airbytehq
diff --git a/‎airbyte_cdk/sources/declarative/declarative_component_schema.yaml‎
Lines changed: 39 additions & 0 deletions b/‎airbyte_cdk/sources/declarative/declarative_component_schema.yaml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py‎
Lines changed: 28 additions & 0 deletions b/‎airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py‎
Lines changed: 28 additions & 0 deletions
@@ -2628,6 +2628,40 @@ definitions:
       type:
         type: string
         enum: [JsonDecoder]
+  JsonItemsDecoder:
+    title: JSON Items (Streaming)
+    description: >-
+      Select 'JSON Items (Streaming)' to stream-decode a single JSON document
+      by yielding each element of a nested array, one at a time. Use this for
+      very large single-document JSON responses (e.g. a wrapping object
+      containing a multi-GB array) where buffering the whole document into
+      memory would cause out-of-memory errors. Powered by the `ijson`
+      streaming parser.
+    type: object
+    required:
+      - type
+      - items_path
+    properties:
+      type:
+        type: string
+        enum: [JsonItemsDecoder]
+      items_path:
+        title: Items Path
+        description: >-
+          Dot-separated path to the JSON array whose elements should be
+          yielded as records. Uses `ijson` path syntax (e.g. `data.users`),
+          not JSONPath syntax — do not include leading `$.` or trailing
+          `[*]`.
+        type: string
+        examples:
+          - dataByDepartmentAndSearchTerm
+          - dataByAsin
+          - data.users
+      encoding:
+        title: Encoding
+        description: Text encoding used to decode the streamed bytes before JSON parsing.
+        type: string
+        default: utf-8
   JsonlDecoder:
     title: JSON Lines
     description: Select 'JSON Lines' if the response consists of JSON objects separated by new lines ('\n') in JSONL format.
@@ -2869,6 +2903,7 @@ definitions:
           - "$ref": "#/definitions/CsvDecoder"
           - "$ref": "#/definitions/GzipDecoder"
           - "$ref": "#/definitions/JsonDecoder"
+          - "$ref": "#/definitions/JsonItemsDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
   ListPartitionRouter:
     title: List Partition Router
@@ -3909,6 +3944,7 @@ definitions:
         description: Component decoding the response so records can be extracted.
         anyOf:
           - "$ref": "#/definitions/JsonDecoder"
+          - "$ref": "#/definitions/JsonItemsDecoder"
           - "$ref": "#/definitions/XmlDecoder"
           - "$ref": "#/definitions/CsvDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
@@ -3997,6 +4033,7 @@ definitions:
           - "$ref": "#/definitions/CsvDecoder"
           - "$ref": "#/definitions/GzipDecoder"
           - "$ref": "#/definitions/JsonDecoder"
+          - "$ref": "#/definitions/JsonItemsDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
   CsvDecoder:
     title: CSV
@@ -4163,6 +4200,7 @@ definitions:
           - "$ref": "#/definitions/CsvDecoder"
           - "$ref": "#/definitions/GzipDecoder"
           - "$ref": "#/definitions/JsonDecoder"
+          - "$ref": "#/definitions/JsonItemsDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
           - "$ref": "#/definitions/IterableDecoder"
           - "$ref": "#/definitions/XmlDecoder"
@@ -4175,6 +4213,7 @@ definitions:
           - "$ref": "#/definitions/CsvDecoder"
           - "$ref": "#/definitions/GzipDecoder"
           - "$ref": "#/definitions/JsonDecoder"
+          - "$ref": "#/definitions/JsonItemsDecoder"
           - "$ref": "#/definitions/JsonlDecoder"
           - "$ref": "#/definitions/IterableDecoder"
           - "$ref": "#/definitions/XmlDecoder"
 
@@ -11,6 +11,7 @@
 from io import BufferedIOBase, TextIOWrapper
 from typing import Any, List, Optional
 
+import ijson
 import orjson
 import requests
 
@@ -98,6 +99,33 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
                 logger.warning(f"Cannot decode/parse line {line!r} as JSON, error: {e}")
 
 
+@dataclass
+class JsonItemsParser(Parser):
+    """Streaming JSON parser that yields each element of a nested array.
+
+    Use this for very large single-document JSON responses where the records
+    of interest live under a nested array (e.g. `dataByDepartmentAndSearchTerm`,
+    `data.users`). Powered by `ijson`, this parser does not materialize the
+    full document — peak memory is bounded by a single record plus ijson's
+    internal parse buffers, regardless of document size.
+
+    `items_path` uses `ijson` dotted path syntax (e.g. `data.users`), not
+    JSONPath syntax (`$.data.users[*]`). Internally we append `.item`, which
+    is the `ijson` convention for "iterate elements of this array".
+    """
+
+    items_path: str = ""
+    encoding: Optional[str] = "utf-8"
+
+    def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
+        if not self.items_path:
+            raise ValueError("JsonItemsParser requires a non-empty items_path.")
+        # ijson auto-selects the best available backend (yajl2_c when present)
+        # and reads from `data` lazily — it does not call `.read()` on the
+        # whole stream up front.
+        yield from ijson.items(data, f"{self.items_path}.item")
+
+
 @dataclass
 class CsvParser(Parser):
     # TODO: migrate implementation to re-use file-base classes