From 5b30fcaad8f0f41362696d4299658452217f8e86 Mon Sep 17 00:00:00 2001
From: tallison <tallison@apache.org>
Date: Mon, 11 May 2026 09:24:40 -0400
Subject: [PATCH 1/7] update parse modes and configuration.adoc

---
 .../ROOT/pages/pipes/configuration.adoc       |   2 +-
 docs/modules/ROOT/pages/pipes/index.adoc      |   2 +-
 .../modules/ROOT/pages/pipes/parse-modes.adoc | 143 +++++++++++++-----
 3 files changed, 103 insertions(+), 44 deletions(-)
diff --git a/docs/modules/ROOT/pages/pipes/configuration.adoc b/docs/modules/ROOT/pages/pipes/configuration.adoc
index c6614e7811e..e9c75ab0603 100644
--- a/docs/modules/ROOT/pages/pipes/configuration.adoc
+++ b/docs/modules/ROOT/pages/pipes/configuration.adoc
@@ -98,7 +98,7 @@ See also xref:pipes/timeouts.adoc[Timeouts] for the full timeout model.
 
 |`parseMode`
 |`RMETA`
-|How embedded documents are handled: `RMETA` (recursive metadata list), `CONCATENATE`, `CONTENT_ONLY`, `UNPACK`. See xref:pipes/parse-modes.adoc[Parse Modes].
+|How embedded documents are handled: `RMETA` (recursive metadata list), `CONCATENATE`, `CONTENT_ONLY`, `NO_PARSE`, `UNPACK`. See xref:pipes/parse-modes.adoc[Parse Modes].
 
 |`onParseException`
 |`EMIT`
diff --git a/docs/modules/ROOT/pages/pipes/index.adoc b/docs/modules/ROOT/pages/pipes/index.adoc
index 796f9d7f1f1..7bd20782388 100644
--- a/docs/modules/ROOT/pages/pipes/index.adoc
+++ b/docs/modules/ROOT/pages/pipes/index.adoc
@@ -48,7 +48,7 @@ against problematic files.
 * xref:pipes/iterators.adoc[Iterators] -- document enumeration (directory walk, S3 listing, CSV, JDBC, Kafka, etc.)
 * xref:pipes/reporters.adoc[Reporters] -- track per-document processing status
 * xref:pipes/configuration.adoc[Pipeline Configuration] -- numClients, timeouts, JVM args, parse modes, emit batching
-* xref:pipes/parse-modes.adoc[Parse Modes] -- control how documents are parsed and emitted (`RMETA`, `CONCATENATE`, `CONTENT_ONLY`, `UNPACK`)
+* xref:pipes/parse-modes.adoc[Parse Modes] -- control how documents are parsed and emitted (`RMETA`, `CONCATENATE`, `CONTENT_ONLY`, `NO_PARSE`, `UNPACK`)
 * xref:pipes/unpack-config.adoc[Extracting Embedded Bytes] -- extract raw bytes from embedded documents
 * xref:pipes/timeouts.adoc[Timeouts] -- two-tier timeout system for handling long-running and hung parsers
 
diff --git a/docs/modules/ROOT/pages/pipes/parse-modes.adoc b/docs/modules/ROOT/pages/pipes/parse-modes.adoc
index a023d0b4062..2a1af6a5936 100644
--- a/docs/modules/ROOT/pages/pipes/parse-modes.adoc
+++ b/docs/modules/ROOT/pages/pipes/parse-modes.adoc
@@ -16,6 +16,8 @@
 //
 
 = Parse Modes
+:toc:
+:toclevels: 3
 
 Tika Pipes uses `ParseMode` to control how documents are parsed and how results are emitted.
 The parse mode is set on the `ParseContext` or configured in `PipesConfig`.
@@ -27,28 +29,60 @@ The parse mode is set on the `ParseContext` or configured in `PipesConfig`.
 |Mode |Description
 
 |`RMETA`
-|Default mode. Each embedded document produces a separate `Metadata` object.
-Results are returned as a JSON array of metadata objects.
+|Default mode. Each embedded document produces its own `Metadata` object.
+Results are returned as a JSON array of metadata objects, preserving per-embedded metadata.
 
 |`CONCATENATE`
-|All content from embedded documents is concatenated into a single content field.
-Results are returned as a single `Metadata` object with all metadata preserved.
+|All embedded-document text is concatenated into a single content field on the **container's** `Metadata` object.
+Per-embedded metadata is **not** retained in the result. See <<concatenate-mode>>.
 
 |`CONTENT_ONLY`
-|Parses like `CONCATENATE` but emits only the raw extracted content — no JSON wrapper,
-no metadata fields. Useful when you want just the text, markdown, or HTML output.
+|Same parsing as `CONCATENATE`, but emitters write only the raw content — no JSON wrapper,
+no metadata fields. See <<content-only-mode>>.
 
 |`NO_PARSE`
-|Skip parsing entirely. Useful for pipelines that only need to fetch and emit raw bytes.
+|Skips parsing. Container-level MIME detection and digesting (if configured) still run.
+See <<no-parse-mode>>.
 
 |`UNPACK`
 |Extract raw bytes from embedded documents. See xref:pipes/unpack-config.adoc[Extracting Embedded Bytes].
 |===
 
+== Content Handler Types
+
+The content handler type determines the format of the extracted text. It is set on the
+`ContentHandlerFactory` configured in `parseContext` (or via the CLI `-h` flag), and applies
+to all modes that produce content (`RMETA`, `CONCATENATE`, `CONTENT_ONLY`).
+
+[cols="1,1,2"]
+|===
+|Handler |Extension |Description
+
+|`t` (text)
+|`.txt`
+|Plain text output
+
+|`h` (html)
+|`.html`
+|HTML output
+
+|`x` (xml)
+|`.xml`
+|XHTML output
+
+|`m` (markdown)
+|`.md`
+|Markdown output
+
+|`b` (body)
+|`.txt`
+|Body content handler output (text from the document body only)
+|===
+
+[#concatenate-mode]
 == CONCATENATE Mode
 
-`CONCATENATE` merges all content from embedded documents into a single content field
-while preserving all metadata from parsing:
+`CONCATENATE` merges all extracted text — from the container and all embedded documents — into a single content field on the container's `Metadata` object.
 
 [source,json]
 ----
@@ -59,12 +93,28 @@ while preserving all metadata from parsing:
 }
 ----
 
-The result is a single `Metadata` object containing the concatenated content in
-`X-TIKA:content` along with all other metadata fields (title, author, content type, etc.).
+=== What's in the result
+
+* A **single** `Metadata` object (the container's).
+* `X-TIKA:content` contains the concatenated text of the container and all reachable embedded documents.
+* Container-level metadata fields (title, author, content type, etc.) are present.
+* The handler type used is recorded in `X-TIKA:content_handler_type`.
+
+=== What's NOT in the result
+
+* **Per-embedded-document metadata is discarded.** If an embedded PDF has its own title and author, those values are not in the output. Only the container's metadata is returned. Use `RMETA` if you need per-embedded metadata.
+* Individual embedded-document parse exceptions are not surfaced as separate entries. They are handled by Tika's embedded document extractor and may appear as embedded-exception fields on the container metadata, but there is no per-embedded `Metadata` object to inspect.
+
+=== Container-level exceptions
+
+If the container parse fails (`SAXException`, `EncryptedDocumentException`, or any other `Exception`), the stack trace is caught, logged, and stored on the container metadata as `X-TIKA:container_exception`. The parse continues to a return value rather than throwing — callers must check this field if they need to detect failure.
 
+If the configured write limit is reached during concatenation, `X-TIKA:write_limit_reached` is set to `true`.
+
+[#content-only-mode]
 == CONTENT_ONLY Mode
 
-`CONTENT_ONLY` is designed for use cases where you want just the extracted content
+`CONTENT_ONLY` is designed for cases where you want just the extracted content
 written to storage — no JSON wrapping, no metadata overhead. This is particularly
 useful for:
 
@@ -81,22 +131,20 @@ useful for:
 }
 ----
 
-=== How It Works
+=== How it works
 
-1. Documents are parsed identically to `CONCATENATE` mode — all embedded content is
-   merged into a single content field.
-2. A metadata filter automatically strips all metadata except `X-TIKA:content` and
-   `X-TIKA:CONTAINER_EXCEPTION` (for error tracking).
+1. Documents are parsed identically to `CONCATENATE` mode — all embedded text is merged into the container's content field, and the same caveats around per-embedded metadata apply.
+2. A metadata filter automatically strips all metadata except `X-TIKA:content` and `X-TIKA:container_exception` (for error tracking).
 3. When the emitter is a `StreamEmitter` (such as the filesystem or S3 emitter), the
    raw content string is written directly as bytes — no JSON serialization.
 
-=== Metadata Filtering
+=== Metadata filtering
 
 By default, `CONTENT_ONLY` mode applies an `IncludeFieldMetadataFilter` that retains
-only `X-TIKA:content` and `X-TIKA:CONTAINER_EXCEPTION`. If you set your own
+only `X-TIKA:content` and `X-TIKA:container_exception`. If you set your own
 `MetadataFilter` on the `ParseContext`, your filter takes priority.
 
-=== CLI Usage
+=== CLI usage
 
 The `tika-async-cli` batch processor supports `CONTENT_ONLY` via the `--content-only`
 flag:
@@ -107,33 +155,44 @@ java -jar tika-async-cli.jar -i /input -o /output -h m --content-only
 ----
 
 This produces `.md` files (when using the `m` handler type) containing only the
-extracted markdown content.
+extracted markdown content. See <<_content_handler_types>> for the available handler types.
 
-=== Content Handler Types
+[#no-parse-mode]
+== NO_PARSE Mode
 
-The content format depends on the configured handler type:
+`NO_PARSE` skips parsing entirely. The container's content type is still detected, and any configured digester still runs against the raw bytes. No text is extracted, no embedded documents are recursed into.
 
-[cols="1,1,2"]
-|===
-|Handler |Extension |Description
+[source,json]
+----
+{
+  "parseContext": {
+    "parseMode": "NO_PARSE"
+  }
+}
+----
 
-|`t` (text)
-|`.txt`
-|Plain text output
+=== What still runs
 
-|`h` (html)
-|`.html`
-|HTML output
+* **MIME detection.** The configured `Detector` runs against the input stream and populates `Content-Type` and `X-TIKA:content_type_parser_override` on the container metadata.
+* **Digesting.** If a `DigesterFactory` is configured on the `ParseContext`, it runs against the raw bytes and writes the digest fields (e.g., `X-TIKA:digest:SHA256`) to the container metadata before the parse-mode check.
 
-|`x` (xml)
-|`.xml`
-|XHTML output
+=== What does NOT run
 
-|`m` (markdown)
-|`.md`
-|Markdown output
+* No parser is invoked. `X-TIKA:content` is empty.
+* No embedded documents are extracted.
+* No content handler is constructed (handler-type configuration is ignored for this mode).
 
-|`b` (body)
-|`.txt`
-|Body content handler output
-|===
+=== When to use
+
+* **Fetch-and-emit pipelines** that move bytes from one store to another and need only the content type and a fixed-bytes digest for downstream routing or deduplication.
+* **Hash-only inventories** of large corpora where parsing every document is too expensive but a stable digest per file is required.
+* **MIME triage**: detect content types across a large set so a downstream pipeline can pick the right parser, parse mode, or skip rule.
+
+Because digest and detection run in `_preParse` regardless of parse mode, switching between `NO_PARSE` and the parsing modes leaves digest values stable for the same input — useful for cross-stage joins.
+
+[#unpack-mode]
+== UNPACK Mode
+
+`UNPACK` extracts the raw bytes of embedded documents (rather than their parsed text) and emits them via the configured emitter. See xref:pipes/unpack-config.adoc[Extracting Embedded Bytes] for the full configuration model.
+
+The recursive parsing pass for `UNPACK` uses the same code path as `RMETA`; the difference is at setup and emit time, where mandatory byte extraction is enabled and emitted bytes are routed through the `UnpackHandler`.

From 87b5cc2bbc386e7bbbeff815570f5f4efed042f9 Mon Sep 17 00:00:00 2001
From: tallison <tallison@apache.org>
Date: Mon, 11 May 2026 11:43:31 -0400
Subject: [PATCH 2/7] add file system docs

---
 .../ROOT/pages/pipes/plugins/filesystem.adoc  | 255 ++++++++++++++++++
 1 file changed, 255 insertions(+)
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/filesystem.adoc

diff --git a/docs/modules/ROOT/pages/pipes/plugins/filesystem.adoc b/docs/modules/ROOT/pages/pipes/plugins/filesystem.adoc
new file mode 100644
index 00000000000..85fba5889e2
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/filesystem.adoc
@@ -0,0 +1,255 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= File System Plugin
+:toc:
+:toclevels: 3
+
+The File System plugin (`tika-pipes-file-system`) is the most common starting point for Tika Pipes. It provides all four interfaces — fetcher, emitter, iterator, and reporter — backed by the local (or mounted) filesystem.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`file-system-fetcher`
+|`FileSystemFetcher`
+
+|Emitter
+|`file-system-emitter`
+|`FileSystemEmitter`
+
+|Iterator
+|`file-system-pipes-iterator`
+|`FileSystemPipesIterator`
+
+|Reporter
+|`file-system-reporter`
+|`FileSystemStatusReporter`
+|===
+
+== Complete Pipeline Example
+
+The example below is the canonical filesystem-to-filesystem integration test config. Tokens like `FETCHER_BASE_PATH`, `EMITTER_BASE_PATH`, and `PLUGINS_PATHS` are placeholders the test harness substitutes; replace them with real paths in your own config.
+
+[source,json,subs=none]
+----
+include::example$pipes-fs-pipeline.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json[View source on GitHub]
+
+[#file-system-fetcher]
+== File System Fetcher (`file-system-fetcher`)
+
+Reads files from a local or mounted filesystem. Fetch keys are resolved relative to `basePath`.
+
+[source,json]
+----
+{
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/data/input",
+        "extractFileSystemMetadata": true
+      }
+    }
+  }
+}
+----
+
+The outer key (`fsf`) is the fetcher ID — referenced by `pipesIterator.fetcherId` elsewhere in the config.
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`basePath`
+|_required_
+|Base directory for fetch operations. Fetch keys are resolved relative to this path.
+
+|`extractFileSystemMetadata`
+|`false`
+|When `true`, attach file size, created, and modified timestamps to the metadata of each fetched document.
+
+|`allowAbsolutePaths`
+|`false`
+|When `true`, fetch keys may be absolute paths and `basePath` may be omitted. Use sparingly — see <<security-notes>>.
+|===
+
+[#file-system-emitter]
+== File System Emitter (`file-system-emitter`)
+
+Writes parsed results as files under `basePath`. The relative output path is derived from the emit key of each `FetchEmitTuple`.
+
+[source,json]
+----
+{
+  "emitters": {
+    "fse": {
+      "file-system-emitter": {
+        "basePath": "/data/output",
+        "fileExtension": "json",
+        "onExists": "EXCEPTION",
+        "prettyPrint": false
+      }
+    }
+  }
+}
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`basePath`
+|_required_
+|Base output directory. The emit key is resolved relative to this path.
+
+|`fileExtension`
+|`json`
+|Extension appended to each output file. For `CONTENT_ONLY` mode, set this to match the handler type (`txt`, `html`, `md`, `xml`).
+
+|`onExists`
+|`EXCEPTION`
+|Behavior when the output file already exists: `SKIP` (do nothing), `REPLACE` (overwrite), `EXCEPTION` (fail loudly).
+
+|`prettyPrint`
+|`false`
+|Pretty-print JSON output. Has no effect in `CONTENT_ONLY` mode (raw bytes are written).
+|===
+
+[#file-system-iterator]
+== File System Iterator (`file-system-pipes-iterator`)
+
+Recursively walks a directory tree, emitting one `FetchEmitTuple` per file found.
+
+[source,json]
+----
+{
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/data/input",
+      "countTotal": true,
+      "fetcherId": "fsf",
+      "emitterId": "fse"
+    }
+  }
+}
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`basePath`
+|_required_
+|Root directory to walk.
+
+|`countTotal`
+|`true`
+|If `true`, walks the tree once to count files before processing begins. Enables progress reporting at the cost of an extra scan over the tree.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+=== Notes
+
+* Walk order is filesystem-dependent and not guaranteed stable across runs.
+* The relative path of each file (from `basePath`) becomes the fetch key, and by default also the emit key.
+* Symbolic links are followed.
+
+[#file-system-reporter]
+== File System Reporter (`file-system-reporter`)
+
+Maintains a JSON status file that summarizes pipeline progress. The reporter writes the file periodically on a background thread; per-record `report()` calls only update in-memory counters.
+
+[source,json]
+----
+{
+  "pipes-reporters": {
+    "file-system-reporter": {
+      "statusFile": "/var/log/tika/status.json",
+      "reportUpdateMs": 1000
+    }
+  }
+}
+----
+
+`pipes-reporters` accepts multiple reporters keyed by type name — see xref:pipes/reporters.adoc[Pipes Reporters] for how multiple reporters compose.
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`statusFile`
+|_required_
+|Path of the JSON status file. The file is created on first write and overwritten in place.
+
+|`reportUpdateMs`
+|_no default_
+|Interval in milliseconds between status-file writes. Typical values: `1000` for a low-overhead heartbeat, `100` for near-real-time updates. There is no built-in default — always set this explicitly.
+|===
+
+=== Status file schema
+
+The reporter serializes an `AsyncStatus` object to JSON, containing:
+
+* `asyncStatus` — current pipeline phase (`STARTED`, `COMPLETED`, `CRASHED`).
+* `counts` — map of `RESULT_STATUS` to count (e.g., `PARSE_SUCCESS`, `PARSE_EXCEPTION`, `TIMEOUT`, `OOM`).
+* `totalCountResult` — total documents processed and whether the enumeration is complete.
+* `timestamp` — when the file was last written.
+* `crashMessage` — populated only on fatal pipeline failure.
+
+The file is rewritten in full on each tick, not appended.
+
+[#watching]
+=== Live status for watching applications
+
+The reporter is designed to support external "watchers" — UIs, dashboards, or monitoring scripts that poll the status file to display pipeline progress. To use it that way, set `reportUpdateMs` to match your desired refresh rate:
+
+[source,json]
+----
+"reportUpdateMs": 250
+----
+
+The watcher polls `statusFile` on its own interval and reads the most recent snapshot. Because the file is rewritten in full with the latest status, watchers do not need to handle partial reads.
+
+This pattern is used by `tika-gui-v2` to drive its progress UI: the GUI starts a pipeline subprocess, points the reporter at a temp file, and polls that file every few hundred milliseconds.
+
+Tradeoffs:
+
+* Smaller `reportUpdateMs` values mean more disk writes. On a fast SSD this is negligible, but on a slow disk (or NFS) the writer thread can become a bottleneck.
+* The reporter thread sleeps between writes, so the worst-case staleness of the file is `reportUpdateMs` milliseconds plus serialization time.
+* Per-record `report()` calls are cheap (counter increment only). The cost of "watching" is bounded by the periodic write, not by document throughput.
+
+[#security-notes]
+== Security Notes
+
+* **`basePath` is a sandbox boundary.** The fetcher and emitter reject fetch/emit keys that resolve outside `basePath`. Do not set `allowAbsolutePaths=true` unless the source of fetch keys is fully trusted — an attacker-controlled fetch key could otherwise read arbitrary files.
+* **Symlinks are followed.** A symlink under `basePath` pointing outside `basePath` may still be readable. If you need strict containment, do not allow symlinks in your input tree.
+* **Output directories are created automatically.** The emitter creates intermediate directories as needed. Make sure the process's umask is appropriate for the data being written.

From 9cf2de2c3a170cdbbbdaae2b6480b56d81290c94 Mon Sep 17 00:00:00 2001
From: tallison <tallison@apache.org>
Date: Mon, 11 May 2026 11:43:39 -0400
Subject: [PATCH 3/7] add file system docs

---
 .../ROOT/examples/pipes-fs-pipeline.json      |   2 +-
 docs/modules/ROOT/nav.adoc                    |   2 +
 .../ROOT/pages/pipes/getting-started.adoc     |   4 +-
 .../ROOT/pages/pipes/plugins/index.adoc       | 133 ++++++++++++++++++
 4 files changed, 139 insertions(+), 2 deletions(-)
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/index.adoc

diff --git a/docs/modules/ROOT/examples/pipes-fs-pipeline.json b/docs/modules/ROOT/examples/pipes-fs-pipeline.json
index 5a7538b1416..4b71666add9 120000
--- a/docs/modules/ROOT/examples/pipes-fs-pipeline.json
+++ b/docs/modules/ROOT/examples/pipes-fs-pipeline.json
@@ -1 +1 @@
-../../../../tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
\ No newline at end of file
+../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index 979555022a7..ef16b190dde 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -31,6 +31,8 @@
 ** xref:pipes/unpack-config.adoc[Extracting Embedded Bytes]
 ** xref:pipes/timeouts.adoc[Timeouts]
 ** xref:pipes/cpu-sizing.adoc[Forked-JVM CPU Sizing]
+** xref:pipes/plugins/index.adoc[Plugins]
+*** xref:pipes/plugins/filesystem.adoc[File System]
 * xref:configuration/index.adoc[Configuration]
 ** xref:configuration/parsers/pdf-parser.adoc[PDF Parser]
 ** xref:configuration/parsers/tesseract-ocr-parser.adoc[Tesseract OCR]
diff --git a/docs/modules/ROOT/pages/pipes/getting-started.adoc b/docs/modules/ROOT/pages/pipes/getting-started.adoc
index 6ee6c451482..e52e02f1acd 100644
--- a/docs/modules/ROOT/pages/pipes/getting-started.adoc
+++ b/docs/modules/ROOT/pages/pipes/getting-started.adoc
@@ -64,7 +64,9 @@ pipeline:
 ----
 include::example$pipes-fs-pipeline.json[]
 ----
-icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json[View source on GitHub]
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json[View source on GitHub]
+
+NOTE: The values shown like `FETCHER_BASE_PATH`, `EMITTER_BASE_PATH`, and `PLUGINS_PATHS` are placeholders the integration tests substitute at runtime. Replace them with real paths in your own config.
 
 Run it with:
 
diff --git a/docs/modules/ROOT/pages/pipes/plugins/index.adoc b/docs/modules/ROOT/pages/pipes/plugins/index.adoc
new file mode 100644
index 00000000000..8542fa20343
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/index.adoc
@@ -0,0 +1,133 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Pipes Plugins
+
+Tika Pipes is extensible through plugins. Each plugin lives in its own Maven module and can implement one or more of the four pipes extension points:
+
+* **Fetcher** — retrieves document bytes from a source.
+* **Emitter** — writes parsed results to a destination.
+* **Iterator** (`PipesIterator`) — enumerates documents to process as `FetchEmitTuple` records.
+* **Reporter** (`PipesReporter`) — records per-document processing status.
+
+Many plugins implement more than one (e.g., the S3 plugin provides fetcher, emitter, and iterator). The pages below document each plugin once, with one section per implemented interface.
+
+== Plugin / Interface Matrix
+
+[cols="2,1,1,1,1"]
+|===
+|Plugin |Fetcher |Emitter |Iterator |Reporter
+
+|xref:pipes/plugins/filesystem.adoc[File System]
+|✓
+|✓
+|✓
+|✓
+
+|xref:pipes/plugins/s3.adoc[Amazon S3]
+|✓
+|✓
+|✓
+|—
+
+|xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
+|✓
+|✓
+|✓
+|—
+
+|xref:pipes/plugins/azblob.adoc[Azure Blob Storage]
+|✓
+|✓
+|✓
+|—
+
+|xref:pipes/plugins/opensearch.adoc[OpenSearch]
+|—
+|✓
+|—
+|✓
+
+|xref:pipes/plugins/elasticsearch.adoc[Elasticsearch]
+|—
+|✓
+|—
+|✓
+
+|xref:pipes/plugins/solr.adoc[Solr]
+|—
+|✓
+|✓
+|—
+
+|xref:pipes/plugins/jdbc.adoc[JDBC]
+|—
+|✓
+|✓
+|✓
+
+|xref:pipes/plugins/kafka.adoc[Kafka]
+|—
+|✓
+|✓
+|—
+
+|xref:pipes/plugins/http.adoc[HTTP]
+|✓
+|—
+|—
+|—
+
+|xref:pipes/plugins/google-drive.adoc[Google Drive]
+|✓
+|—
+|—
+|—
+
+|xref:pipes/plugins/microsoft-graph.adoc[Microsoft Graph]
+|✓
+|—
+|—
+|—
+
+|xref:pipes/plugins/atlassian-jwt.adoc[Atlassian JWT]
+|✓
+|—
+|—
+|—
+
+|xref:pipes/plugins/csv.adoc[CSV]
+|—
+|—
+|✓
+|—
+
+|xref:pipes/plugins/json.adoc[JSON]
+|—
+|—
+|✓
+|—
+|===
+
+== Interface Overviews
+
+For descriptions of the interfaces themselves — their contracts, the shared concepts (`FetchKey`, `FetchEmitTuple`, `baseConfig`, etc.), and how they fit into a pipeline — see:
+
+* xref:pipes/fetchers.adoc[Fetchers]
+* xref:pipes/emitters.adoc[Emitters]
+* xref:pipes/iterators.adoc[Pipes Iterators]
+* xref:pipes/reporters.adoc[Pipes Reporters]

From e6d9e53c1239aa23b6630a5bd3a5646275e45224 Mon Sep 17 00:00:00 2001
From: tallison <tallison@apache.org>
Date: Mon, 11 May 2026 16:07:05 -0400
Subject: [PATCH 4/7] add s3

---
 .../ROOT/examples/pipes-s3-emitter.json       |   1 +
 .../ROOT/examples/pipes-s3-fetcher.json       |   1 +
 .../ROOT/examples/pipes-s3-iterator.json      |   1 +
 .../ROOT/examples/pipes-s3-pipeline.json      |   1 +
 docs/modules/ROOT/nav.adoc                    |   1 +
 .../ROOT/pages/pipes/plugins/index.adoc       |   2 +-
 docs/modules/ROOT/pages/pipes/plugins/s3.adoc | 242 ++++++++++++++++++
 .../tika/pipes/s3/ConfigExamplesTest.java     | 136 ++++++++++
 .../resources/config-examples/s3-emitter.json |  14 +
 .../resources/config-examples/s3-fetcher.json |  15 ++
 .../config-examples/s3-pipeline.json          |  49 ++++
 .../config-examples/s3-pipes-iterator.json    |  13 +
 12 files changed, 475 insertions(+), 1 deletion(-)
 create mode 120000 docs/modules/ROOT/examples/pipes-s3-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-s3-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-s3-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-s3-pipeline.json
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/s3.adoc
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-fetcher.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipes-iterator.json

diff --git a/docs/modules/ROOT/examples/pipes-s3-emitter.json b/docs/modules/ROOT/examples/pipes-s3-emitter.json
new file mode 120000
index 00000000000..6f05a73ec21
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-s3-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-s3-fetcher.json b/docs/modules/ROOT/examples/pipes-s3-fetcher.json
new file mode 120000
index 00000000000..b24bd4fa27a
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-s3-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-s3-iterator.json b/docs/modules/ROOT/examples/pipes-s3-iterator.json
new file mode 120000
index 00000000000..db1b210e827
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-s3-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-s3-pipeline.json b/docs/modules/ROOT/examples/pipes-s3-pipeline.json
new file mode 120000
index 00000000000..cc6f573ec2c
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-s3-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index ef16b190dde..90fce8701d9 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -33,6 +33,7 @@
 ** xref:pipes/cpu-sizing.adoc[Forked-JVM CPU Sizing]
 ** xref:pipes/plugins/index.adoc[Plugins]
 *** xref:pipes/plugins/filesystem.adoc[File System]
+*** xref:pipes/plugins/s3.adoc[Amazon S3]
 * xref:configuration/index.adoc[Configuration]
 ** xref:configuration/parsers/pdf-parser.adoc[PDF Parser]
 ** xref:configuration/parsers/tesseract-ocr-parser.adoc[Tesseract OCR]
diff --git a/docs/modules/ROOT/pages/pipes/plugins/index.adoc b/docs/modules/ROOT/pages/pipes/plugins/index.adoc
index 8542fa20343..d5173d2032e 100644
--- a/docs/modules/ROOT/pages/pipes/plugins/index.adoc
+++ b/docs/modules/ROOT/pages/pipes/plugins/index.adoc
@@ -125,7 +125,7 @@ Many plugins implement more than one (e.g., the S3 plugin provides fetcher, emit
 
 == Interface Overviews
 
-For descriptions of the interfaces themselves — their contracts, the shared concepts (`FetchKey`, `FetchEmitTuple`, `baseConfig`, etc.), and how they fit into a pipeline — see:
+For descriptions of the interfaces themselves — their contracts, the shared concepts (`FetchKey`, `FetchEmitTuple`, `fetcherId`/`emitterId` wiring, etc.), and how they fit into a pipeline — see:
 
 * xref:pipes/fetchers.adoc[Fetchers]
 * xref:pipes/emitters.adoc[Emitters]
diff --git a/docs/modules/ROOT/pages/pipes/plugins/s3.adoc b/docs/modules/ROOT/pages/pipes/plugins/s3.adoc
new file mode 100644
index 00000000000..90d0960f06e
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/s3.adoc
@@ -0,0 +1,242 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Amazon S3 Plugin
+:toc:
+:toclevels: 3
+
+The Amazon S3 plugin (`tika-pipes-s3`) provides fetcher, emitter, and iterator interfaces for objects in S3 (or any S3-compatible service such as MinIO).
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`s3-fetcher`
+|`S3Fetcher`
+
+|Emitter
+|`s3-emitter`
+|`S3Emitter`
+
+|Iterator
+|`s3-pipes-iterator`
+|`S3PipesIterator`
+|===
+
+[#credentials]
+== Credentials
+
+All three components share the same `credentialsProvider` selector:
+
+* `profile` — reads credentials from the local AWS profile named by `profile` (e.g., `default`).
+* `instance` — uses the instance/container role attached to the host (EC2 IAM role, ECS task role, etc.). No additional fields needed.
+* `key_secret` — reads `accessKey` and `secretKey` from the config. Avoid checking these into source control; prefer environment-variable substitution or one of the other providers.
+
+The emitter's `validate()` enforces these values, but the fetcher and iterator do not — they fail later when the AWS SDK tries to resolve credentials.
+
+[#s3-fetcher]
+== S3 Fetcher (`s3-fetcher`)
+
+Reads objects from an S3 bucket. The fetch key is the S3 key under `prefix` (if set).
+
+[source,json]
+----
+include::example$pipes-s3-fetcher.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`bucket`
+|_required_
+|S3 bucket name.
+
+|`region`
+|_required_
+|AWS region (e.g., `us-east-1`).
+
+|`prefix`
+|_no default_
+|Optional key prefix. Fetch keys are resolved underneath this prefix.
+
+|`credentialsProvider`
+|_required_
+|One of `profile`, `instance`, `key_secret`. See <<credentials>>.
+
+|`profile` / `accessKey` / `secretKey`
+|_conditional_
+|Required by the matching `credentialsProvider`.
+
+|`spoolToTemp`
+|`true`
+|If `true`, the fetched object is spooled to a temp file before being parsed.
+
+|`extractUserMetadata`
+|`true`
+|If `true`, S3 user-metadata is copied into the parsed `Metadata`.
+
+|`maxConnections`
+|`0`
+|Maximum HTTP connections in the S3 client pool. `0` lets the SDK pick a default.
+
+|`maxLength`
+|`-1`
+|Maximum object size, in bytes. `-1` means no limit.
+
+|`endpointConfigurationService`
+|_no default_
+|Custom S3 endpoint, for S3-compatible services such as MinIO or LocalStack.
+
+|`pathStyleAccessEnabled`
+|`false`
+|Force path-style URLs (e.g., `https://endpoint/bucket/key`). Required by some S3-compatible services.
+
+|`throttleSeconds`
+|_no default_
+|Optional rate-limit array; consecutive failures sleep for the corresponding number of seconds.
+|===
+
+[#s3-emitter]
+== S3 Emitter (`s3-emitter`)
+
+Writes parsed results back to an S3 bucket. The emit key (relative to `prefix`) is derived from the `FetchEmitTuple`.
+
+[source,json]
+----
+include::example$pipes-s3-emitter.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`bucket`
+|_required_
+|Destination S3 bucket name (validated non-blank).
+
+|`region`
+|_required_
+|AWS region (validated non-blank).
+
+|`credentialsProvider`
+|_required_
+|One of `profile`, `instance`, `key_secret` (validated). See <<credentials>>.
+
+|`profile` / `accessKey` / `secretKey`
+|_conditional_
+|Required by the matching `credentialsProvider` (validated).
+
+|`prefix`
+|_no default_
+|Optional key prefix. A trailing `/` is stripped automatically.
+
+|`fileExtension`
+|`json`
+|Extension appended to each emitted key.
+
+|`spoolToTemp`
+|`true`
+|If `true`, output is spooled locally before being uploaded.
+
+|`maxConnections`
+|`50`
+|Maximum HTTP connections in the S3 client pool.
+
+|`endpointConfigurationService`
+|_no default_
+|Custom S3 endpoint, for S3-compatible services.
+
+|`pathStyleAccessEnabled`
+|`false`
+|Force path-style URLs.
+|===
+
+[#s3-iterator]
+== S3 Iterator (`s3-pipes-iterator`)
+
+Lists objects under a bucket/prefix and emits one `FetchEmitTuple` per object found.
+
+[source,json]
+----
+include::example$pipes-s3-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`bucket`
+|_required_
+|S3 bucket to enumerate.
+
+|`region`
+|_required_
+|AWS region.
+
+|`prefix`
+|`""`
+|Key prefix to scope the listing.
+
+|`credentialsProvider`
+|_optional_
+|One of `profile`, `instance`, `key_secret`. See <<credentials>>.
+
+|`profile` / `accessKey` / `secretKey` / `endpointConfigurationService`
+|_conditional_
+|Auth fields, mirroring the fetcher and emitter.
+
+|`fileNamePattern`
+|_no default_
+|Optional regex; only keys whose name matches are emitted.
+
+|`maxConnections`
+|`50`
+|Maximum HTTP connections in the S3 client pool.
+
+|`pathStyleAccessEnabled`
+|`false`
+|Force path-style URLs.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#s3-pipeline]
+== Complete Pipeline Example
+
+The example below wires the S3 fetcher, emitter, and iterator into a complete pipeline that lists `s3://my-tika-input/incoming/` and writes results to `s3://my-tika-output/results/`.
+
+[source,json]
+----
+include::example$pipes-s3-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* The fetcher, emitter, and iterator each maintain their own S3 client. Auth and endpoint settings need to be configured per component, not globally.
+* The S3 SDK enforces TLS 1.2+ by default; in-flight encryption is on. For at-rest encryption, configure bucket-level SSE on the AWS side.
+* When using `endpointConfigurationService` against MinIO or LocalStack, you almost always need `pathStyleAccessEnabled: true`.
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
new file mode 100644
index 00000000000..f248d8194e4
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/java/org/apache/tika/pipes/s3/ConfigExamplesTest.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.s3;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.s3.S3EmitterConfig;
+import org.apache.tika.pipes.fetcher.s3.config.S3FetcherConfig;
+import org.apache.tika.pipes.iterator.s3.S3PipesIteratorConfig;
+
+/**
+ * Validates S3 fetcher/emitter/iterator configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testS3FetcherConfig() throws Exception {
+        loadViaTikaLoader("s3-fetcher.json");
+
+        JsonNode inner = innerComponent(readExample("s3-fetcher.json"),
+                "fetchers", "s3f", "s3-fetcher");
+        S3FetcherConfig config = S3FetcherConfig.load(inner.toString());
+        assertEquals("my-tika-input", config.getBucket());
+        assertEquals("us-east-1", config.getRegion());
+        assertEquals("profile", config.getCredentialsProvider());
+        assertEquals("default", config.getProfile());
+    }
+
+    @Test
+    public void testS3EmitterConfig() throws Exception {
+        loadViaTikaLoader("s3-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("s3-emitter.json"),
+                "emitters", "s3e", "s3-emitter");
+        S3EmitterConfig config = S3EmitterConfig.load(inner.toString());
+        assertEquals("my-tika-output", config.bucket());
+        assertEquals("us-east-1", config.region());
+        assertEquals("profile", config.credentialsProvider());
+        assertEquals("json", config.fileExtension());
+        // exercises required-field + credentialsProvider whitelist validation
+        config.validate();
+    }
+
+    @Test
+    public void testS3IteratorConfig() throws Exception {
+        loadViaTikaLoader("s3-pipes-iterator.json");
+
+        JsonNode inner = innerComponent(readExample("s3-pipes-iterator.json"),
+                "pipes-iterator", null, "s3-pipes-iterator");
+        S3PipesIteratorConfig config = S3PipesIteratorConfig.load(inner.toString());
+        assertEquals("my-tika-input", config.getBucket());
+        assertEquals("us-east-1", config.getRegion());
+        assertEquals("s3f", config.getFetcherId());
+        assertEquals("s3e", config.getEmitterId());
+    }
+
+    @Test
+    public void testS3PipelineConfig() throws Exception {
+        loadViaTikaLoader("s3-pipeline.json");
+
+        String json = readExample("s3-pipeline.json");
+        S3FetcherConfig fetcher = S3FetcherConfig.load(
+                innerComponent(json, "fetchers", "s3f", "s3-fetcher").toString());
+        S3EmitterConfig emitter = S3EmitterConfig.load(
+                innerComponent(json, "emitters", "s3e", "s3-emitter").toString());
+        S3PipesIteratorConfig iterator = S3PipesIteratorConfig.load(
+                innerComponent(json, "pipes-iterator", null, "s3-pipes-iterator").toString());
+
+        emitter.validate();
+        assertEquals(fetcher.getBucket(), iterator.getBucket());
+        assertEquals("s3f", iterator.getFetcherId());
+        assertEquals("s3e", iterator.getEmitterId());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-emitter.json
new file mode 100644
index 00000000000..8cd5557db1c
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-emitter.json
@@ -0,0 +1,14 @@
+{
+  "emitters": {
+    "s3e": {
+      "s3-emitter": {
+        "bucket": "my-tika-output",
+        "region": "us-east-1",
+        "prefix": "results/",
+        "fileExtension": "json",
+        "credentialsProvider": "profile",
+        "profile": "default"
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-fetcher.json
new file mode 100644
index 00000000000..8047fee2b08
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-fetcher.json
@@ -0,0 +1,15 @@
+{
+  "fetchers": {
+    "s3f": {
+      "s3-fetcher": {
+        "bucket": "my-tika-input",
+        "region": "us-east-1",
+        "prefix": "incoming/",
+        "credentialsProvider": "profile",
+        "profile": "default",
+        "extractUserMetadata": true,
+        "spoolToTemp": true
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipeline.json
new file mode 100644
index 00000000000..1f17aa7081d
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipeline.json
@@ -0,0 +1,49 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "s3f": {
+      "s3-fetcher": {
+        "bucket": "my-tika-input",
+        "region": "us-east-1",
+        "prefix": "incoming/",
+        "credentialsProvider": "profile",
+        "profile": "default",
+        "extractUserMetadata": true
+      }
+    }
+  },
+  "emitters": {
+    "s3e": {
+      "s3-emitter": {
+        "bucket": "my-tika-output",
+        "region": "us-east-1",
+        "prefix": "results/",
+        "fileExtension": "json",
+        "credentialsProvider": "profile",
+        "profile": "default"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "s3-pipes-iterator": {
+      "bucket": "my-tika-input",
+      "region": "us-east-1",
+      "prefix": "incoming/",
+      "credentialsProvider": "profile",
+      "profile": "default",
+      "fetcherId": "s3f",
+      "emitterId": "s3e"
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipes-iterator.json
new file mode 100644
index 00000000000..e1fb2e98750
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipes-iterator.json
@@ -0,0 +1,13 @@
+{
+  "pipes-iterator": {
+    "s3-pipes-iterator": {
+      "bucket": "my-tika-input",
+      "region": "us-east-1",
+      "prefix": "incoming/",
+      "credentialsProvider": "profile",
+      "profile": "default",
+      "fetcherId": "s3f",
+      "emitterId": "s3e"
+    }
+  }
+}

From cbc65c9bb88b85763660bb6d556a3dd87e5601d9 Mon Sep 17 00:00:00 2001
From: tallison <tallison@apache.org>
Date: Mon, 11 May 2026 16:11:28 -0400
Subject: [PATCH 5/7] gcs

---
 .../ROOT/examples/pipes-gcs-emitter.json      |   1 +
 .../ROOT/examples/pipes-gcs-fetcher.json      |   1 +
 .../ROOT/examples/pipes-gcs-iterator.json     |   1 +
 .../ROOT/examples/pipes-gcs-pipeline.json     |   1 +
 docs/modules/ROOT/nav.adoc                    |   1 +
 .../modules/ROOT/pages/pipes/plugins/gcs.adoc | 166 ++++++++++++++++++
 .../tika/pipes/gcs/ConfigExamplesTest.java    | 133 ++++++++++++++
 .../config-examples/gcs-emitter.json          |  12 ++
 .../config-examples/gcs-fetcher.json          |  12 ++
 .../config-examples/gcs-pipeline.json         |  42 +++++
 .../config-examples/gcs-pipes-iterator.json   |  11 ++
 11 files changed, 381 insertions(+)
 create mode 120000 docs/modules/ROOT/examples/pipes-gcs-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-gcs-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-gcs-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-gcs-pipeline.json
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/gcs.adoc
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-fetcher.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipes-iterator.json

diff --git a/docs/modules/ROOT/examples/pipes-gcs-emitter.json b/docs/modules/ROOT/examples/pipes-gcs-emitter.json
new file mode 120000
index 00000000000..48c994f74ad
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-gcs-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-gcs-fetcher.json b/docs/modules/ROOT/examples/pipes-gcs-fetcher.json
new file mode 120000
index 00000000000..8b390e310c0
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-gcs-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-gcs-iterator.json b/docs/modules/ROOT/examples/pipes-gcs-iterator.json
new file mode 120000
index 00000000000..d4f6b6b9347
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-gcs-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-gcs-pipeline.json b/docs/modules/ROOT/examples/pipes-gcs-pipeline.json
new file mode 120000
index 00000000000..621bad767e0
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-gcs-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index 90fce8701d9..e5e2a096244 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -34,6 +34,7 @@
 ** xref:pipes/plugins/index.adoc[Plugins]
 *** xref:pipes/plugins/filesystem.adoc[File System]
 *** xref:pipes/plugins/s3.adoc[Amazon S3]
+*** xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
 * xref:configuration/index.adoc[Configuration]
 ** xref:configuration/parsers/pdf-parser.adoc[PDF Parser]
 ** xref:configuration/parsers/tesseract-ocr-parser.adoc[Tesseract OCR]
diff --git a/docs/modules/ROOT/pages/pipes/plugins/gcs.adoc b/docs/modules/ROOT/pages/pipes/plugins/gcs.adoc
new file mode 100644
index 00000000000..d639580d0f7
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/gcs.adoc
@@ -0,0 +1,166 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Google Cloud Storage Plugin
+:toc:
+:toclevels: 3
+
+The Google Cloud Storage plugin (`tika-pipes-gcs`) provides fetcher, emitter, and iterator interfaces for objects in GCS buckets.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`gcs-fetcher`
+|`GCSFetcher`
+
+|Emitter
+|`gcs-emitter`
+|`GCSEmitter`
+
+|Iterator
+|`gcs-pipes-iterator`
+|`GCSPipesIterator`
+|===
+
+[#credentials]
+== Credentials
+
+The GCS plugin relies on Google's Application Default Credentials chain — there are no credential fields in the JSON config itself. Set credentials by:
+
+* Running on a GCP service (GCE/GKE/Cloud Run) — uses the attached service account automatically.
+* Setting the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of a service-account JSON key.
+* Running `gcloud auth application-default login` for local development.
+
+The `projectId` field in each component selects which GCP project to bill the API calls against; the service account or user must have storage access to the named bucket.
+
+[#gcs-fetcher]
+== GCS Fetcher (`gcs-fetcher`)
+
+Reads objects from a GCS bucket. The fetch key is the object name.
+
+[source,json]
+----
+include::example$pipes-gcs-fetcher.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`projectId`
+|_required_
+|GCP project ID for billing/authentication.
+
+|`bucket`
+|_required_
+|GCS bucket name.
+
+|`spoolToTemp`
+|`true`
+|If `true`, the fetched object is spooled to a temp file before parsing.
+
+|`extractUserMetadata`
+|`true`
+|If `true`, GCS custom metadata is copied into the parsed `Metadata`.
+|===
+
+[#gcs-emitter]
+== GCS Emitter (`gcs-emitter`)
+
+Writes parsed results to a GCS bucket. The emit key (relative to `prefix`) is derived from the `FetchEmitTuple`.
+
+[source,json]
+----
+include::example$pipes-gcs-emitter.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`projectId`
+|_required_
+|GCP project ID (validated non-blank).
+
+|`bucket`
+|_required_
+|Destination GCS bucket (validated non-blank).
+
+|`prefix`
+|_no default_
+|Optional object-name prefix. A trailing `/` is stripped automatically.
+
+|`fileExtension`
+|`json`
+|Extension appended to each emitted object name.
+|===
+
+[#gcs-iterator]
+== GCS Iterator (`gcs-pipes-iterator`)
+
+Lists objects under a bucket/prefix and emits one `FetchEmitTuple` per object.
+
+[source,json]
+----
+include::example$pipes-gcs-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`bucket`
+|_required_
+|GCS bucket to enumerate.
+
+|`projectId`
+|`""`
+|GCP project ID for the listing API call.
+
+|`prefix`
+|`""`
+|Object-name prefix to scope the listing.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#gcs-pipeline]
+== Complete Pipeline Example
+
+The example below wires the GCS fetcher, emitter, and iterator together for a bucket-to-bucket pipeline.
+
+[source,json]
+----
+include::example$pipes-gcs-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* The GCS plugin uses the official `google-cloud-storage` SDK. Set `GOOGLE_APPLICATION_CREDENTIALS` (or rely on workload identity / metadata server) to authenticate.
+* Each component creates its own `Storage` client. Heavy throughput should be balanced against your project's per-second request quota.
+* Unlike S3, there is no `path-style` toggle — GCS uses a single global endpoint.
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
new file mode 100644
index 00000000000..7cfc1f3fb16
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/java/org/apache/tika/pipes/gcs/ConfigExamplesTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.gcs;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.gcs.GCSEmitterConfig;
+import org.apache.tika.pipes.fetcher.gcs.config.GCSFetcherConfig;
+import org.apache.tika.pipes.iterator.gcs.GCSPipesIteratorConfig;
+
+/**
+ * Validates GCS fetcher/emitter/iterator configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testGCSFetcherConfig() throws Exception {
+        loadViaTikaLoader("gcs-fetcher.json");
+
+        JsonNode inner = innerComponent(readExample("gcs-fetcher.json"),
+                "fetchers", "gcsf", "gcs-fetcher");
+        GCSFetcherConfig config = GCSFetcherConfig.load(inner.toString());
+        assertEquals("my-gcp-project", config.getProjectId());
+        assertEquals("my-tika-input", config.getBucket());
+    }
+
+    @Test
+    public void testGCSEmitterConfig() throws Exception {
+        loadViaTikaLoader("gcs-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("gcs-emitter.json"),
+                "emitters", "gcse", "gcs-emitter");
+        GCSEmitterConfig config = GCSEmitterConfig.load(inner.toString());
+        assertEquals("my-gcp-project", config.projectId());
+        assertEquals("my-tika-output", config.bucket());
+        assertEquals("json", config.fileExtension());
+        config.validate();
+        assertEquals("results", config.getNormalizedPrefix());
+    }
+
+    @Test
+    public void testGCSIteratorConfig() throws Exception {
+        loadViaTikaLoader("gcs-pipes-iterator.json");
+
+        JsonNode inner = innerComponent(readExample("gcs-pipes-iterator.json"),
+                "pipes-iterator", null, "gcs-pipes-iterator");
+        GCSPipesIteratorConfig config = GCSPipesIteratorConfig.load(inner.toString());
+        assertEquals("my-gcp-project", config.getProjectId());
+        assertEquals("my-tika-input", config.getBucket());
+        assertEquals("gcsf", config.getFetcherId());
+        assertEquals("gcse", config.getEmitterId());
+    }
+
+    @Test
+    public void testGCSPipelineConfig() throws Exception {
+        loadViaTikaLoader("gcs-pipeline.json");
+
+        String json = readExample("gcs-pipeline.json");
+        GCSFetcherConfig fetcher = GCSFetcherConfig.load(
+                innerComponent(json, "fetchers", "gcsf", "gcs-fetcher").toString());
+        GCSEmitterConfig emitter = GCSEmitterConfig.load(
+                innerComponent(json, "emitters", "gcse", "gcs-emitter").toString());
+        GCSPipesIteratorConfig iterator = GCSPipesIteratorConfig.load(
+                innerComponent(json, "pipes-iterator", null, "gcs-pipes-iterator").toString());
+
+        emitter.validate();
+        assertEquals(fetcher.getBucket(), iterator.getBucket());
+        assertEquals("gcsf", iterator.getFetcherId());
+        assertEquals("gcse", iterator.getEmitterId());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-emitter.json
new file mode 100644
index 00000000000..6ba06037924
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-emitter.json
@@ -0,0 +1,12 @@
+{
+  "emitters": {
+    "gcse": {
+      "gcs-emitter": {
+        "projectId": "my-gcp-project",
+        "bucket": "my-tika-output",
+        "prefix": "results/",
+        "fileExtension": "json"
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-fetcher.json
new file mode 100644
index 00000000000..89ab85eed3b
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-fetcher.json
@@ -0,0 +1,12 @@
+{
+  "fetchers": {
+    "gcsf": {
+      "gcs-fetcher": {
+        "projectId": "my-gcp-project",
+        "bucket": "my-tika-input",
+        "extractUserMetadata": true,
+        "spoolToTemp": true
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipeline.json
new file mode 100644
index 00000000000..8c483e51049
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipeline.json
@@ -0,0 +1,42 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "gcsf": {
+      "gcs-fetcher": {
+        "projectId": "my-gcp-project",
+        "bucket": "my-tika-input",
+        "extractUserMetadata": true
+      }
+    }
+  },
+  "emitters": {
+    "gcse": {
+      "gcs-emitter": {
+        "projectId": "my-gcp-project",
+        "bucket": "my-tika-output",
+        "prefix": "results/",
+        "fileExtension": "json"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "gcs-pipes-iterator": {
+      "projectId": "my-gcp-project",
+      "bucket": "my-tika-input",
+      "prefix": "incoming/",
+      "fetcherId": "gcsf",
+      "emitterId": "gcse"
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipes-iterator.json
new file mode 100644
index 00000000000..756e087848b
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipes-iterator.json
@@ -0,0 +1,11 @@
+{
+  "pipes-iterator": {
+    "gcs-pipes-iterator": {
+      "projectId": "my-gcp-project",
+      "bucket": "my-tika-input",
+      "prefix": "incoming/",
+      "fetcherId": "gcsf",
+      "emitterId": "gcse"
+    }
+  }
+}

From 06270d3ae2d0b2f31e173f3fcbdb5376497d9432 Mon Sep 17 00:00:00 2001
From: tallison <tallison@apache.org>
Date: Mon, 11 May 2026 16:17:42 -0400
Subject: [PATCH 6/7] azblob

---
 .../ROOT/examples/pipes-azblob-emitter.json   |   1 +
 .../ROOT/examples/pipes-azblob-fetcher.json   |   1 +
 .../ROOT/examples/pipes-azblob-iterator.json  |   1 +
 .../ROOT/examples/pipes-azblob-pipeline.json  |   1 +
 docs/modules/ROOT/nav.adoc                    |   1 +
 .../ROOT/pages/pipes/plugins/azblob.adoc      | 185 ++++++++++++++++++
 .../tika/pipes/azblob/ConfigExamplesTest.java | 134 +++++++++++++
 .../config-examples/az-blob-emitter.json      |  14 ++
 .../config-examples/az-blob-fetcher.json      |  13 ++
 .../config-examples/az-blob-pipeline.json     |  45 +++++
 .../az-blob-pipes-iterator.json               |  13 ++
 11 files changed, 409 insertions(+)
 create mode 120000 docs/modules/ROOT/examples/pipes-azblob-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-azblob-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-azblob-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-azblob-pipeline.json
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/azblob.adoc
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-fetcher.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipes-iterator.json

diff --git a/docs/modules/ROOT/examples/pipes-azblob-emitter.json b/docs/modules/ROOT/examples/pipes-azblob-emitter.json
new file mode 120000
index 00000000000..8213f434fa9
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-azblob-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-azblob-fetcher.json b/docs/modules/ROOT/examples/pipes-azblob-fetcher.json
new file mode 120000
index 00000000000..c7d8ce2d52a
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-azblob-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-azblob-iterator.json b/docs/modules/ROOT/examples/pipes-azblob-iterator.json
new file mode 120000
index 00000000000..bc68d45fb08
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-azblob-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-azblob-pipeline.json b/docs/modules/ROOT/examples/pipes-azblob-pipeline.json
new file mode 120000
index 00000000000..1e3c9dc8602
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-azblob-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index e5e2a096244..e72c1d637bb 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -35,6 +35,7 @@
 *** xref:pipes/plugins/filesystem.adoc[File System]
 *** xref:pipes/plugins/s3.adoc[Amazon S3]
 *** xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
+*** xref:pipes/plugins/azblob.adoc[Azure Blob Storage]
 * xref:configuration/index.adoc[Configuration]
 ** xref:configuration/parsers/pdf-parser.adoc[PDF Parser]
 ** xref:configuration/parsers/tesseract-ocr-parser.adoc[Tesseract OCR]
diff --git a/docs/modules/ROOT/pages/pipes/plugins/azblob.adoc b/docs/modules/ROOT/pages/pipes/plugins/azblob.adoc
new file mode 100644
index 00000000000..1e462b0f705
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/azblob.adoc
@@ -0,0 +1,185 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Azure Blob Storage Plugin
+:toc:
+:toclevels: 3
+
+The Azure Blob Storage plugin (`tika-pipes-az-blob`) provides fetcher, emitter, and iterator interfaces for blobs in Azure Storage containers.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`az-blob-fetcher`
+|`AZBlobFetcher`
+
+|Emitter
+|`az-blob-emitter`
+|`AZBlobEmitter`
+
+|Iterator
+|`az-blob-pipes-iterator`
+|`AZBlobPipesIterator`
+|===
+
+[#credentials]
+== Credentials
+
+All three components authenticate with a SAS (shared-access-signature) token. There are no other auth modes — managed identity, account keys, and AD-based auth are not currently exposed.
+
+* `endpoint` — base URL of the storage account, e.g., `https://myaccount.blob.core.windows.net`.
+* `sasToken` — the URL query-string portion of a generated SAS, without a leading `?`. Permissions in the token must match the operations the component will perform (read for fetchers/iterators, read+write for emitters).
+
+The emitter's `validate()` enforces that `sasToken`, `endpoint`, and `container` are all non-blank, but does not parse the SAS itself — invalid or expired tokens fail later when the Azure SDK makes a request.
+
+[#az-blob-fetcher]
+== Azure Blob Fetcher (`az-blob-fetcher`)
+
+Reads blobs from an Azure Storage container. The fetch key is the blob name.
+
+[source,json]
+----
+include::example$pipes-azblob-fetcher.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`endpoint`
+|_required_
+|Storage account URL.
+
+|`container`
+|_required_
+|Container name.
+
+|`sasToken`
+|_required_
+|SAS token granting read access to the container.
+
+|`spoolToTemp`
+|`true`
+|If `true`, the fetched blob is spooled to a temp file before parsing.
+
+|`extractUserMetadata`
+|`true`
+|If `true`, blob user-metadata is copied into the parsed `Metadata`.
+|===
+
+[#az-blob-emitter]
+== Azure Blob Emitter (`az-blob-emitter`)
+
+Writes parsed results to an Azure Storage container. The emit key (relative to `prefix`) is derived from the `FetchEmitTuple`.
+
+[source,json]
+----
+include::example$pipes-azblob-emitter.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`endpoint`
+|_required_
+|Storage account URL (validated non-blank).
+
+|`container`
+|_required_
+|Destination container name (validated non-blank).
+
+|`sasToken`
+|_required_
+|SAS token granting read+write access (validated non-blank).
+
+|`prefix`
+|_no default_
+|Optional blob-name prefix. A trailing `/` is stripped automatically.
+
+|`fileExtension`
+|`json`
+|Extension appended to each emitted blob name.
+
+|`overwriteExisting`
+|`false`
+|If `true`, an existing blob with the same name is overwritten; otherwise the emit fails.
+|===
+
+[#az-blob-iterator]
+== Azure Blob Iterator (`az-blob-pipes-iterator`)
+
+Lists blobs under a container/prefix and emits one `FetchEmitTuple` per blob.
+
+[source,json]
+----
+include::example$pipes-azblob-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`endpoint`
+|_required_
+|Storage account URL.
+
+|`container`
+|_required_
+|Container to enumerate.
+
+|`sasToken`
+|_required_
+|SAS token granting list+read access.
+
+|`prefix`
+|`""`
+|Blob-name prefix to scope the listing.
+
+|`timeoutMillis`
+|`360000`
+|Per-request timeout, in milliseconds (6 minutes by default).
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#az-blob-pipeline]
+== Complete Pipeline Example
+
+The example below wires the Azure Blob fetcher, emitter, and iterator together into a container-to-container pipeline.
+
+[source,json]
+----
+include::example$pipes-azblob-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* SAS tokens have an expiration baked in. For long-running pipelines, rotate the SAS or use a token that outlives the pipeline window.
+* Avoid checking real SAS tokens into source control — the strings in the examples above are placeholders.
+* Each component creates its own `BlobServiceClient`. The Azure SDK pools HTTP connections per client.
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
new file mode 100644
index 00000000000..0a083f608a8
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/java/org/apache/tika/pipes/azblob/ConfigExamplesTest.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.azblob;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.azblob.AZBlobEmitterConfig;
+import org.apache.tika.pipes.fetcher.azblob.config.AZBlobFetcherConfig;
+import org.apache.tika.pipes.iterator.azblob.AZBlobPipesIteratorConfig;
+
+/**
+ * Validates Azure Blob fetcher/emitter/iterator configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testAZBlobFetcherConfig() throws Exception {
+        loadViaTikaLoader("az-blob-fetcher.json");
+
+        JsonNode inner = innerComponent(readExample("az-blob-fetcher.json"),
+                "fetchers", "azf", "az-blob-fetcher");
+        AZBlobFetcherConfig config = AZBlobFetcherConfig.load(inner.toString());
+        assertEquals("tika-input", config.getContainer());
+        assertEquals("https://myaccount.blob.core.windows.net", config.getEndpoint());
+        assertNotNull(config.getSasToken());
+    }
+
+    @Test
+    public void testAZBlobEmitterConfig() throws Exception {
+        loadViaTikaLoader("az-blob-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("az-blob-emitter.json"),
+                "emitters", "aze", "az-blob-emitter");
+        AZBlobEmitterConfig config = AZBlobEmitterConfig.load(inner.toString());
+        assertEquals("tika-output", config.container());
+        assertEquals("json", config.fileExtension());
+        config.validate();
+        assertEquals("results", config.getNormalizedPrefix());
+    }
+
+    @Test
+    public void testAZBlobIteratorConfig() throws Exception {
+        loadViaTikaLoader("az-blob-pipes-iterator.json");
+
+        JsonNode inner = innerComponent(readExample("az-blob-pipes-iterator.json"),
+                "pipes-iterator", null, "az-blob-pipes-iterator");
+        AZBlobPipesIteratorConfig config = AZBlobPipesIteratorConfig.load(inner.toString());
+        assertEquals("tika-input", config.getContainer());
+        assertEquals("incoming/", config.getPrefix());
+        assertEquals(360000L, config.getTimeoutMillis());
+        assertEquals("azf", config.getFetcherId());
+        assertEquals("aze", config.getEmitterId());
+    }
+
+    @Test
+    public void testAZBlobPipelineConfig() throws Exception {
+        loadViaTikaLoader("az-blob-pipeline.json");
+
+        String json = readExample("az-blob-pipeline.json");
+        AZBlobFetcherConfig fetcher = AZBlobFetcherConfig.load(
+                innerComponent(json, "fetchers", "azf", "az-blob-fetcher").toString());
+        AZBlobEmitterConfig emitter = AZBlobEmitterConfig.load(
+                innerComponent(json, "emitters", "aze", "az-blob-emitter").toString());
+        AZBlobPipesIteratorConfig iterator = AZBlobPipesIteratorConfig.load(
+                innerComponent(json, "pipes-iterator", null, "az-blob-pipes-iterator").toString());
+
+        emitter.validate();
+        assertEquals(fetcher.getContainer(), iterator.getContainer());
+        assertEquals("azf", iterator.getFetcherId());
+        assertEquals("aze", iterator.getEmitterId());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-emitter.json
new file mode 100644
index 00000000000..9d102868c9f
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-emitter.json
@@ -0,0 +1,14 @@
+{
+  "emitters": {
+    "aze": {
+      "az-blob-emitter": {
+        "endpoint": "https://myaccount.blob.core.windows.net",
+        "container": "tika-output",
+        "sasToken": "sv=2024-11-04&ss=b&srt=sco&sp=rwl&se=2030-01-01T00:00:00Z&st=2024-01-01T00:00:00Z&spr=https&sig=REDACTED",
+        "prefix": "results/",
+        "fileExtension": "json",
+        "overwriteExisting": false
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-fetcher.json
new file mode 100644
index 00000000000..aebdcedf93f
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-fetcher.json
@@ -0,0 +1,13 @@
+{
+  "fetchers": {
+    "azf": {
+      "az-blob-fetcher": {
+        "endpoint": "https://myaccount.blob.core.windows.net",
+        "container": "tika-input",
+        "sasToken": "sv=2024-11-04&ss=b&srt=sco&sp=rl&se=2030-01-01T00:00:00Z&st=2024-01-01T00:00:00Z&spr=https&sig=REDACTED",
+        "extractUserMetadata": true,
+        "spoolToTemp": true
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipeline.json
new file mode 100644
index 00000000000..65181a1a57d
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipeline.json
@@ -0,0 +1,45 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "azf": {
+      "az-blob-fetcher": {
+        "endpoint": "https://myaccount.blob.core.windows.net",
+        "container": "tika-input",
+        "sasToken": "sv=2024-11-04&ss=b&srt=sco&sp=rl&se=2030-01-01T00:00:00Z&st=2024-01-01T00:00:00Z&spr=https&sig=REDACTED",
+        "extractUserMetadata": true
+      }
+    }
+  },
+  "emitters": {
+    "aze": {
+      "az-blob-emitter": {
+        "endpoint": "https://myaccount.blob.core.windows.net",
+        "container": "tika-output",
+        "sasToken": "sv=2024-11-04&ss=b&srt=sco&sp=rwl&se=2030-01-01T00:00:00Z&st=2024-01-01T00:00:00Z&spr=https&sig=REDACTED",
+        "prefix": "results/",
+        "fileExtension": "json"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "az-blob-pipes-iterator": {
+      "endpoint": "https://myaccount.blob.core.windows.net",
+      "container": "tika-input",
+      "sasToken": "sv=2024-11-04&ss=b&srt=sco&sp=rl&se=2030-01-01T00:00:00Z&st=2024-01-01T00:00:00Z&spr=https&sig=REDACTED",
+      "prefix": "incoming/",
+      "fetcherId": "azf",
+      "emitterId": "aze"
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipes-iterator.json
new file mode 100644
index 00000000000..e2875fa92fb
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipes-iterator.json
@@ -0,0 +1,13 @@
+{
+  "pipes-iterator": {
+    "az-blob-pipes-iterator": {
+      "endpoint": "https://myaccount.blob.core.windows.net",
+      "container": "tika-input",
+      "sasToken": "sv=2024-11-04&ss=b&srt=sco&sp=rl&se=2030-01-01T00:00:00Z&st=2024-01-01T00:00:00Z&spr=https&sig=REDACTED",
+      "prefix": "incoming/",
+      "timeoutMillis": 360000,
+      "fetcherId": "azf",
+      "emitterId": "aze"
+    }
+  }
+}

From e8b98f8e4951c96e8ed41bb0b8f15c89dc98132a Mon Sep 17 00:00:00 2001
From: tallison <tallison@apache.org>
Date: Mon, 11 May 2026 21:00:14 -0400
Subject: [PATCH 7/7] pipes docs updates

---
 .../examples/pipes-atlassian-jwt-fetcher.json |   1 +
 .../ROOT/examples/pipes-config-template.json  |   1 +
 .../ROOT/examples/pipes-csv-iterator.json     |   1 +
 .../examples/pipes-elasticsearch-emitter.json |   1 +
 .../pipes-elasticsearch-pipeline.json         |   1 +
 .../pipes-elasticsearch-reporter.json         |   1 +
 .../modules/ROOT/examples/pipes-emit-all.json |   1 +
 .../ROOT/examples/pipes-fs-emitter.json       |   1 -
 .../ROOT/examples/pipes-fs-fetcher.json       |   1 -
 .../examples/pipes-google-drive-fetcher.json  |   1 +
 .../ROOT/examples/pipes-http-fetcher.json     |   1 +
 .../ROOT/examples/pipes-jdbc-emitter.json     |   1 +
 .../ROOT/examples/pipes-jdbc-iterator.json    |   1 +
 .../ROOT/examples/pipes-jdbc-pipeline.json    |   1 +
 .../ROOT/examples/pipes-jdbc-reporter.json    |   1 +
 .../ROOT/examples/pipes-json-iterator.json    |   1 +
 .../ROOT/examples/pipes-kafka-emitter.json    |   1 +
 .../ROOT/examples/pipes-kafka-iterator.json   |   1 +
 .../ROOT/examples/pipes-kafka-pipeline.json   |   1 +
 .../pipes-microsoft-graph-fetcher.json        |   1 +
 .../examples/pipes-opensearch-emitter.json    |   1 +
 .../examples/pipes-opensearch-pipeline.json   |   1 +
 .../examples/pipes-opensearch-reporter.json   |   1 +
 .../ROOT/examples/pipes-shared-server.json    |   1 +
 .../ROOT/examples/pipes-solr-emitter-zk.json  |   1 +
 .../ROOT/examples/pipes-solr-emitter.json     |   1 +
 .../ROOT/examples/pipes-solr-iterator.json    |   1 +
 .../ROOT/examples/pipes-solr-pipeline.json    |   1 +
 docs/modules/ROOT/nav.adoc                    |  11 +
 .../ROOT/pages/pipes/configuration.adoc       |  51 ++++
 docs/modules/ROOT/pages/pipes/cpu-sizing.adoc |  17 ++
 docs/modules/ROOT/pages/pipes/emitters.adoc   | 245 +++++-----------
 docs/modules/ROOT/pages/pipes/fetchers.adoc   | 264 ++++--------------
 docs/modules/ROOT/pages/pipes/iterators.adoc  | 230 ++++-----------
 .../pages/pipes/plugins/atlassian-jwt.adoc    | 121 ++++++++
 .../modules/ROOT/pages/pipes/plugins/csv.adoc |  75 +++++
 .../pages/pipes/plugins/elasticsearch.adoc    | 196 +++++++++++++
 .../pages/pipes/plugins/google-drive.adoc     |  79 ++++++
 .../ROOT/pages/pipes/plugins/http.adoc        | 132 +++++++++
 .../ROOT/pages/pipes/plugins/jdbc.adoc        | 241 ++++++++++++++++
 .../ROOT/pages/pipes/plugins/json.adoc        |  63 +++++
 .../ROOT/pages/pipes/plugins/kafka.adoc       | 213 ++++++++++++++
 .../pages/pipes/plugins/microsoft-graph.adoc  |  85 ++++++
 .../ROOT/pages/pipes/plugins/opensearch.adoc  | 176 ++++++++++++
 .../ROOT/pages/pipes/plugins/solr.adoc        | 202 ++++++++++++++
 docs/modules/ROOT/pages/pipes/reporters.adoc  |  99 +++----
 .../ROOT/pages/pipes/shared-server-mode.adoc  |   2 +
 .../atlassianjwt/ConfigExamplesTest.java      |  69 +++++
 .../atlassian-jwt-fetcher.json                |  19 ++
 .../config/tika-config-az-blob-fetcher.xml    |  30 --
 .../resources/config/tika-config-az-blob.xml  |  28 --
 .../tika/pipes/csv/ConfigExamplesTest.java    |  70 +++++
 .../config-examples/csv-pipes-iterator.json   |  12 +
 .../tika/pipes/es/ConfigExamplesTest.java     | 126 +++++++++
 .../resources/config-examples/es-emitter.json |  19 ++
 .../config-examples/es-pipeline.json          |  60 ++++
 .../config-examples/es-reporter.json          |  15 +
 .../test/resources/config/tika-config-gcs.xml |  26 --
 .../pipes/googledrive/ConfigExamplesTest.java |  70 +++++
 .../config-examples/google-drive-fetcher.json |  13 +
 .../tika/pipes/http/ConfigExamplesTest.java   |  70 +++++
 .../config-examples/http-fetcher.json         |  21 ++
 .../jdbc/JDBCPipesReporterConfig.java         |  27 +-
 .../tika/pipes/jdbc/ConfigExamplesTest.java   | 150 ++++++++++
 .../config-examples/jdbc-emitter.json         |  22 ++
 .../config-examples/jdbc-pipeline.json        |  56 ++++
 .../config-examples/jdbc-pipes-iterator.json  |  15 +
 .../config-examples/jdbc-reporter.json        |  12 +
 .../tika-config-jdbc-emitter-attachments.xml  |  53 ----
 ...ika-config-jdbc-emitter-existing-table.xml |  42 ---
 .../tika-config-jdbc-emitter-multivalued.xml  |  45 ---
 .../tika-config-jdbc-emitter-trunc.xml        |  44 ---
 .../configs/tika-config-jdbc-emitter.xml      |  54 ----
 .../tika/pipes/json/ConfigExamplesTest.java   |  67 +++++
 .../config-examples/json-pipes-iterator.json  |   9 +
 .../tika/pipes/kafka/ConfigExamplesTest.java  | 119 ++++++++
 .../config-examples/kafka-emitter.json        |  19 ++
 .../config-examples/kafka-pipeline.json       |  43 +++
 .../config-examples/kafka-pipes-iterator.json |  14 +
 .../microsoftgraph/ConfigExamplesTest.java    |  72 +++++
 .../microsoft-graph-fetcher.json              |  15 +
 .../pipes/opensearch/ConfigExamplesTest.java  | 123 ++++++++
 .../config-examples/opensearch-emitter.json   |  21 ++
 .../config-examples/opensearch-pipeline.json  |  64 +++++
 .../config-examples/opensearch-reporter.json  |  17 ++
 .../resources/tika-config-simple-emitter.xml  |  41 ---
 .../tika/pipes/solr/ConfigExamplesTest.java   | 134 +++++++++
 .../config-examples/solr-emitter-zk.json      |  15 +
 .../config-examples/solr-emitter.json         |  17 ++
 .../config-examples/solr-pipeline.json        |  42 +++
 .../config-examples/solr-pipes-iterator.json  |  15 +
 .../resources/tika-config-simple-emitter.xml  |  48 ----
 92 files changed, 3560 insertions(+), 1033 deletions(-)
 create mode 120000 docs/modules/ROOT/examples/pipes-atlassian-jwt-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-config-template.json
 create mode 120000 docs/modules/ROOT/examples/pipes-csv-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-elasticsearch-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-elasticsearch-pipeline.json
 create mode 120000 docs/modules/ROOT/examples/pipes-elasticsearch-reporter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-emit-all.json
 delete mode 120000 docs/modules/ROOT/examples/pipes-fs-emitter.json
 delete mode 120000 docs/modules/ROOT/examples/pipes-fs-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-google-drive-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-http-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-jdbc-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-jdbc-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-jdbc-pipeline.json
 create mode 120000 docs/modules/ROOT/examples/pipes-jdbc-reporter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-json-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-kafka-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-kafka-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-kafka-pipeline.json
 create mode 120000 docs/modules/ROOT/examples/pipes-microsoft-graph-fetcher.json
 create mode 120000 docs/modules/ROOT/examples/pipes-opensearch-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-opensearch-pipeline.json
 create mode 120000 docs/modules/ROOT/examples/pipes-opensearch-reporter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-shared-server.json
 create mode 120000 docs/modules/ROOT/examples/pipes-solr-emitter-zk.json
 create mode 120000 docs/modules/ROOT/examples/pipes-solr-emitter.json
 create mode 120000 docs/modules/ROOT/examples/pipes-solr-iterator.json
 create mode 120000 docs/modules/ROOT/examples/pipes-solr-pipeline.json
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/atlassian-jwt.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/csv.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/elasticsearch.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/google-drive.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/http.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/jdbc.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/json.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/kafka.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/microsoft-graph.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/opensearch.adoc
 create mode 100644 docs/modules/ROOT/pages/pipes/plugins/solr.adoc
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/resources/config-examples/atlassian-jwt-fetcher.json
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob-fetcher.xml
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob.xml
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/resources/config-examples/csv-pipes-iterator.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-reporter.json
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config/tika-config-gcs.xml
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/resources/config-examples/google-drive-fetcher.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/resources/config-examples/http-fetcher.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipes-iterator.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-reporter.json
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-trunc.xml
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/resources/config-examples/json-pipes-iterator.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipes-iterator.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/resources/config-examples/microsoft-graph-fetcher.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-reporter.json
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/tika-config-simple-emitter.xml
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter-zk.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipeline.json
 create mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipes-iterator.json
 delete mode 100644 tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/tika-config-simple-emitter.xml

diff --git a/docs/modules/ROOT/examples/pipes-atlassian-jwt-fetcher.json b/docs/modules/ROOT/examples/pipes-atlassian-jwt-fetcher.json
new file mode 120000
index 00000000000..8f2871640e4
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-atlassian-jwt-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/resources/config-examples/atlassian-jwt-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-config-template.json b/docs/modules/ROOT/examples/pipes-config-template.json
new file mode 120000
index 00000000000..ae8c7de24eb
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-config-template.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-async-cli/src/main/resources/config-template.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-csv-iterator.json b/docs/modules/ROOT/examples/pipes-csv-iterator.json
new file mode 120000
index 00000000000..11bdc189cb4
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-csv-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/resources/config-examples/csv-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-elasticsearch-emitter.json b/docs/modules/ROOT/examples/pipes-elasticsearch-emitter.json
new file mode 120000
index 00000000000..2b48ca80802
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-elasticsearch-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-elasticsearch-pipeline.json b/docs/modules/ROOT/examples/pipes-elasticsearch-pipeline.json
new file mode 120000
index 00000000000..2fefff94957
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-elasticsearch-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-elasticsearch-reporter.json b/docs/modules/ROOT/examples/pipes-elasticsearch-reporter.json
new file mode 120000
index 00000000000..36117c95a8a
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-elasticsearch-reporter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-reporter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-emit-all.json b/docs/modules/ROOT/examples/pipes-emit-all.json
new file mode 120000
index 00000000000..5a5ba03d860
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-emit-all.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-emit-all.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-fs-emitter.json b/docs/modules/ROOT/examples/pipes-fs-emitter.json
deleted file mode 120000
index a9321db9ebd..00000000000
--- a/docs/modules/ROOT/examples/pipes-fs-emitter.json
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-fs-fetcher.json b/docs/modules/ROOT/examples/pipes-fs-fetcher.json
deleted file mode 120000
index faef8e27a1e..00000000000
--- a/docs/modules/ROOT/examples/pipes-fs-fetcher.json
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-google-drive-fetcher.json b/docs/modules/ROOT/examples/pipes-google-drive-fetcher.json
new file mode 120000
index 00000000000..d8afdd781de
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-google-drive-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/resources/config-examples/google-drive-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-http-fetcher.json b/docs/modules/ROOT/examples/pipes-http-fetcher.json
new file mode 120000
index 00000000000..51a6d0387e5
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-http-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/resources/config-examples/http-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-emitter.json b/docs/modules/ROOT/examples/pipes-jdbc-emitter.json
new file mode 120000
index 00000000000..878458e60f2
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-jdbc-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-iterator.json b/docs/modules/ROOT/examples/pipes-jdbc-iterator.json
new file mode 120000
index 00000000000..74eddd76010
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-jdbc-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-pipeline.json b/docs/modules/ROOT/examples/pipes-jdbc-pipeline.json
new file mode 120000
index 00000000000..e3ae2bc1d7a
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-jdbc-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-reporter.json b/docs/modules/ROOT/examples/pipes-jdbc-reporter.json
new file mode 120000
index 00000000000..53d25aa2b6a
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-jdbc-reporter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-reporter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-json-iterator.json b/docs/modules/ROOT/examples/pipes-json-iterator.json
new file mode 120000
index 00000000000..ef848ecda3f
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-json-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/resources/config-examples/json-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-kafka-emitter.json b/docs/modules/ROOT/examples/pipes-kafka-emitter.json
new file mode 120000
index 00000000000..b3a84ca8c42
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-kafka-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-kafka-iterator.json b/docs/modules/ROOT/examples/pipes-kafka-iterator.json
new file mode 120000
index 00000000000..6a35e7d2b72
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-kafka-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-kafka-pipeline.json b/docs/modules/ROOT/examples/pipes-kafka-pipeline.json
new file mode 120000
index 00000000000..cdbbc2c980e
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-kafka-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-microsoft-graph-fetcher.json b/docs/modules/ROOT/examples/pipes-microsoft-graph-fetcher.json
new file mode 120000
index 00000000000..a69990c9873
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-microsoft-graph-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/resources/config-examples/microsoft-graph-fetcher.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-opensearch-emitter.json b/docs/modules/ROOT/examples/pipes-opensearch-emitter.json
new file mode 120000
index 00000000000..6cf72fc6104
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-opensearch-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-opensearch-pipeline.json b/docs/modules/ROOT/examples/pipes-opensearch-pipeline.json
new file mode 120000
index 00000000000..ba12f014085
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-opensearch-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-opensearch-reporter.json b/docs/modules/ROOT/examples/pipes-opensearch-reporter.json
new file mode 120000
index 00000000000..22bd5cbc926
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-opensearch-reporter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-reporter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-shared-server.json b/docs/modules/ROOT/examples/pipes-shared-server.json
new file mode 120000
index 00000000000..e6d0a634c49
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-shared-server.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-shared-server.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-solr-emitter-zk.json b/docs/modules/ROOT/examples/pipes-solr-emitter-zk.json
new file mode 120000
index 00000000000..2af060b6063
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-solr-emitter-zk.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter-zk.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-solr-emitter.json b/docs/modules/ROOT/examples/pipes-solr-emitter.json
new file mode 120000
index 00000000000..80aa2572035
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-solr-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-solr-iterator.json b/docs/modules/ROOT/examples/pipes-solr-iterator.json
new file mode 120000
index 00000000000..e2b7beabd5f
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-solr-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipes-iterator.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/examples/pipes-solr-pipeline.json b/docs/modules/ROOT/examples/pipes-solr-pipeline.json
new file mode 120000
index 00000000000..480ab0bf79a
--- /dev/null
+++ b/docs/modules/ROOT/examples/pipes-solr-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipeline.json
\ No newline at end of file
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
index e72c1d637bb..9f4e70609de 100644
--- a/docs/modules/ROOT/nav.adoc
+++ b/docs/modules/ROOT/nav.adoc
@@ -36,6 +36,17 @@
 *** xref:pipes/plugins/s3.adoc[Amazon S3]
 *** xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
 *** xref:pipes/plugins/azblob.adoc[Azure Blob Storage]
+*** xref:pipes/plugins/opensearch.adoc[OpenSearch]
+*** xref:pipes/plugins/elasticsearch.adoc[Elasticsearch]
+*** xref:pipes/plugins/solr.adoc[Apache Solr]
+*** xref:pipes/plugins/jdbc.adoc[JDBC]
+*** xref:pipes/plugins/kafka.adoc[Apache Kafka]
+*** xref:pipes/plugins/http.adoc[HTTP]
+*** xref:pipes/plugins/google-drive.adoc[Google Drive]
+*** xref:pipes/plugins/microsoft-graph.adoc[Microsoft Graph]
+*** xref:pipes/plugins/atlassian-jwt.adoc[Atlassian JWT]
+*** xref:pipes/plugins/csv.adoc[CSV]
+*** xref:pipes/plugins/json.adoc[JSON]
 * xref:configuration/index.adoc[Configuration]
 ** xref:configuration/parsers/pdf-parser.adoc[PDF Parser]
 ** xref:configuration/parsers/tesseract-ocr-parser.adoc[Tesseract OCR]
diff --git a/docs/modules/ROOT/pages/pipes/configuration.adoc b/docs/modules/ROOT/pages/pipes/configuration.adoc
index e9c75ab0603..f6b3d5c2b64 100644
--- a/docs/modules/ROOT/pages/pipes/configuration.adoc
+++ b/docs/modules/ROOT/pages/pipes/configuration.adoc
@@ -150,3 +150,54 @@ These settings control how parsed results are batched before sending to emitters
 |===
 
 See xref:pipes/shared-server-mode.adoc[Shared Server Mode] for details.
+
+[#complete-examples]
+== Complete examples
+
+Worked-out end-to-end configs from the test tree. Each is loaded by an automated test, so the syntax stays current.
+
+[#fs-pipeline]
+=== Filesystem-to-filesystem pipeline
+
+[source,json,subs=none]
+----
+include::example$pipes-fs-pipeline.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json[View source on GitHub]
+
+Tokens (`FETCHER_BASE_PATH`, `EMITTER_BASE_PATH`, `PLUGINS_PATHS`) are substituted by the test harness — replace them with real paths in production configs.
+
+[#emit-all]
+=== Emit-all variant
+
+[source,json,subs=none]
+----
+include::example$pipes-emit-all.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-emit-all.json[View source on GitHub]
+
+[#shared-server-example]
+=== Shared-server (YOLO) mode
+
+[source,json,subs=none]
+----
+include::example$pipes-shared-server.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-shared-server.json[View source on GitHub]
+
+See xref:pipes/shared-server-mode.adoc[Shared Server Mode] for the trade-offs.
+
+[#config-template]
+=== `tika-async-cli` config template
+
+[source,json,subs=none]
+----
+include::example$pipes-config-template.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-async-cli/src/main/resources/config-template.json[View source on GitHub]
+
+For per-plugin pipeline examples (S3, OpenSearch, JDBC, Kafka, etc.), see the relevant page under xref:pipes/plugins/index.adoc[Plugins].
diff --git a/docs/modules/ROOT/pages/pipes/cpu-sizing.adoc b/docs/modules/ROOT/pages/pipes/cpu-sizing.adoc
index e81a843853f..997ea159b18 100644
--- a/docs/modules/ROOT/pages/pipes/cpu-sizing.adoc
+++ b/docs/modules/ROOT/pages/pipes/cpu-sizing.adoc
@@ -129,6 +129,23 @@ When Tika sees an explicit `-XX:ActiveProcessorCount` in `forkedJvmArgs`, it
 respects your value and skips the auto-injection — the sizing summary will
 report `autoCap=user-set in forkedJvmArgs`.
 
+[#heap-per-worker]
+== Heap per worker — rule of thumb
+
+A reasonable starting point is **~2 GB of heap per forked worker** (passed via `-Xmx2g` in `forkedJvmArgs`). The number falls out of three independent constraints any of which can dominate:
+
+* **Worst-case PDF parsing.** A handful of pathological PDFs in any reasonably large corpus will allocate hundreds of MB of intermediate object data per document — large image streams, deeply nested form fields, big embedded fonts. Smaller heaps OOM on those documents; larger heaps just let GC clean up between docs.
+* **Embedded-document explosion.** A zip-bomb-shaped office document with thousands of embedded objects multiplies per-doc allocation by the embedding count. The `maxEmbeddedResources` setting caps the count, but each retained object still lives in the heap until the whole tree finishes parsing.
+* **GC headroom.** G1GC behaves poorly above ~85% occupancy. A `-Xmx2g` worker comfortably handles documents that allocate up to ~1.5 GB of live data; below that you start trading throughput for memory.
+
+This is a default — not a tuning recommendation. To right-size for your specific corpus:
+
+. Measure peak per-worker live-heap with `-Xlog:gc*` (look at the post-GC working set, not the peak before GC).
+. Pick `-Xmx` ≈ `1.5 × peakLiveHeap` to leave GC headroom.
+. Re-measure under your real concurrency. Embedded-doc-heavy formats (PowerPoint, complex Word) shift this number up; flat text or PDF-text-only shifts it down.
+
+The pod-level heap budget is `numClients × per-worker-Xmx + parent-overhead`. On a 16 GB node running `numClients=4`, that's about `4 × 2 GB + 1 GB ≈ 9 GB` — comfortably below the node limit, leaving room for kernel, IO buffers, and a non-saturated pod.
+
 == Container & cgroup behavior
 
 The formula uses `Runtime.availableProcessors()` for the host CPU count,
diff --git a/docs/modules/ROOT/pages/pipes/emitters.adoc b/docs/modules/ROOT/pages/pipes/emitters.adoc
index 3feeb8ebf3c..3fa494b4378 100644
--- a/docs/modules/ROOT/pages/pipes/emitters.adoc
+++ b/docs/modules/ROOT/pages/pipes/emitters.adoc
@@ -16,205 +16,94 @@
 //
 
 = Emitters
+:toc:
 
-Emitters write parsed results to a destination. Each emitter is identified by
-its component name and an `id` that is referenced by the pipes iterator.
+An *emitter* writes parse results to a destination — a file on disk, a row in a database, a document in a search index, a message on a queue, etc.
 
-== File System Emitter (`file-system-emitter`)
+[#contract]
+== The Emitter Contract
 
-Writes parsed metadata as JSON files to a local or mounted filesystem.
+Each emitter implements `Emitter#emit(EmitData emitData)`, where `EmitData` carries the emit key, the parsed `Metadata`, and (for content-emitting strategies) the extracted content.
 
-**Module:** `tika-pipes-file-system`
+The emit key is supplied by the iterator on each `FetchEmitTuple` and tells the emitter where to put the result. Its shape depends on the emitter:
 
-[source,json,subs=none]
-----
-include::example$pipes-fs-emitter.json[]
-----
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`basePath`
-|_required_
-|Base output directory.
-
-|`fileExtension`
-|`json`
-|Extension for output files.
-
-|`onExists`
-|`EXCEPTION`
-|Behavior when output file exists: `SKIP`, `REPLACE`, `EXCEPTION`.
-
-|`prettyPrint`
-|`false`
-|Pretty-print JSON output.
-|===
-
-== Elasticsearch Emitter (`es-emitter`)
-
-Sends parsed documents to Elasticsearch via the `_bulk` API. Uses plain HTTP --
-no dependency on the ES Java client.
-
-**Module:** `tika-pipes-es`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`esUrl`
-|_required_
-|Full URL including index (e.g., `https://localhost:9200/my-index`).
-
-|`idField`
-|`_id`
-|Metadata field used as the document `_id`.
-
-|`apiKey`
-|_none_
-|Base64-encoded `id:api_key` for authentication.
-
-|`attachmentStrategy`
-|`SEPARATE_DOCUMENTS`
-|`SEPARATE_DOCUMENTS` or `PARENT_CHILD`.
-
-|`updateStrategy`
-|`OVERWRITE`
-|`OVERWRITE` (full replace) or `UPSERT` (field-level merge).
-
-|`embeddedFileFieldName`
-|`embedded`
-|Join-field name for `PARENT_CHILD` mode.
-|===
-
-== OpenSearch Emitter (`opensearch-emitter`)
-
-Sends documents to OpenSearch. Configured identically to the ES emitter
-but uses `openSearchUrl` instead of `esUrl`.
-
-**Module:** `tika-pipes-opensearch`
-
-== S3 Emitter (`s3-emitter`)
-
-Writes parsed metadata as JSON objects to Amazon S3.
-
-**Module:** `tika-pipes-s3`
+* file-system / S3 / GCS / Azure Blob — a key/path relative to `basePath` or `prefix`.
+* OpenSearch / Elasticsearch / Solr — the `_id` field value, taken from the metadata field named by the emitter's `idField`.
+* JDBC — the value bound to the first `?` placeholder in the `insert` template.
+* Kafka — the Kafka record key.
 
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`bucket`
-|_required_
-|S3 bucket name.
-
-|`region`
-|_required_
-|AWS region.
-
-|`prefix`
-|_none_
-|S3 key prefix for output objects.
-
-|`credentialsProvider`
-|`profile`
-|Credentials type: `profile`, `static`, `instance`.
-
-|`fileExtension`
-|`json`
-|File extension for output keys.
-|===
-
-== GCS Emitter (`gcs-emitter`)
-
-Writes parsed metadata to Google Cloud Storage.
-
-**Module:** `tika-pipes-gcs`
-
-== Azure Blob Emitter (`az-blob-emitter`)
-
-Writes parsed metadata to Azure Blob Storage.
-
-**Module:** `tika-pipes-az-blob`
-
-== Solr Emitter (`solr-emitter`)
+Emitters are intended to be safe under concurrent use; the pipeline's worker pool may call `emit()` from many threads.
 
-Indexes parsed documents into Apache Solr.
+[#wiring]
+== Wiring Emitters Into a Pipeline
 
-**Module:** `tika-pipes-solr`
+Emitters live under the top-level `emitters` key. Each emitter gets an ID (the outer map key) and a type-name (the inner map key); the iterator references the ID through its `emitterId` field.
 
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`solrCollection`
-|_required_
-|Solr collection name.
-
-|`solrUrls`
-|_required_
-|List of Solr URLs.
+[source,json]
+----
+{
+  "emitters": {
+    "output": {
+      "file-system-emitter": {
+        "basePath": "/data/output",
+        "fileExtension": "json"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/data/input",
+      "fetcherId": "...",
+      "emitterId": "output"
+    }
+  }
+}
+----
 
-|`idField`
-|`id`
-|Field name for document ID.
+A pipeline may declare multiple emitters and choose between them at iterator-config time. Within a single iterator, each emitted `FetchEmitTuple` carries exactly one emitter ID.
 
-|`commitWithin`
-|`-1`
-|Milliseconds before auto-commit (-1 = server default).
+[#plugins]
+== Available Emitters
 
-|`attachmentStrategy`
-|`SEPARATE_DOCUMENTS`
-|How to handle embedded documents.
+[cols="2,2,3"]
 |===
+|Plugin |Component name |Notes
 
-== JDBC Emitter (`jdbc-emitter`)
+|xref:pipes/plugins/filesystem.adoc[File System]
+|`file-system-emitter`
+|Local / mounted filesystem.
 
-Writes parsed metadata to a SQL database via JDBC.
+|xref:pipes/plugins/s3.adoc[Amazon S3]
+|`s3-emitter`
+|S3 or S3-compatible.
 
-**Module:** `tika-pipes-jdbc`
+|xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
+|`gcs-emitter`
+|GCS via ADC.
 
-[cols="1,1,3"]
-|===
-|Field |Default |Description
+|xref:pipes/plugins/azblob.adoc[Azure Blob Storage]
+|`az-blob-emitter`
+|SAS-token auth.
 
-|`connection`
-|_required_
-|JDBC connection string.
+|xref:pipes/plugins/opensearch.adoc[OpenSearch]
+|`opensearch-emitter`
+|REST-based bulk indexing.
 
-|`insert`
-|_required_
-|SQL INSERT statement with `?` placeholders.
+|xref:pipes/plugins/elasticsearch.adoc[Elasticsearch]
+|`es-emitter`
+|REST-based bulk indexing; ApiKey or basic auth.
 
-|`keys`
-|_required_
-|Ordered list of metadata keys to bind to placeholders.
-|===
+|xref:pipes/plugins/solr.adoc[Apache Solr]
+|`solr-emitter`
+|SolrCloud (URLs or ZooKeeper).
 
-== Kafka Emitter (`kafka-emitter`)
+|xref:pipes/plugins/jdbc.adoc[JDBC]
+|`jdbc-emitter`
+|Any RDBMS with a JDBC driver.
 
-Sends parsed metadata as messages to Apache Kafka.
-
-**Module:** `tika-pipes-kafka`
-
-[cols="1,1,3"]
+|xref:pipes/plugins/kafka.adoc[Apache Kafka]
+|`kafka-emitter`
+|Standard Kafka producer.
 |===
-|Field |Default |Description
-
-|`topic`
-|_required_
-|Kafka topic name.
 
-|`bootstrapServers`
-|_required_
-|Kafka broker addresses.
-
-|`acks`
-|`all`
-|Acknowledgment requirement.
-
-|`lingerMs`
-|`0`
-|Batch wait time in milliseconds.
-|===
+For the full plugin / interface matrix, see xref:pipes/plugins/index.adoc[Plugins].
diff --git a/docs/modules/ROOT/pages/pipes/fetchers.adoc b/docs/modules/ROOT/pages/pipes/fetchers.adoc
index eff355d0dea..96beaf47495 100644
--- a/docs/modules/ROOT/pages/pipes/fetchers.adoc
+++ b/docs/modules/ROOT/pages/pipes/fetchers.adoc
@@ -16,230 +16,82 @@
 //
 
 = Fetchers
+:toc:
 
-Fetchers retrieve document bytes from a source. Each fetcher is identified by
-its component name and an `id` that is referenced by the pipes iterator.
+A *fetcher* retrieves the bytes of a document from a source — a local filesystem, an S3 bucket, an HTTP URL, etc. — and returns them as an `InputStream` to the parser.
 
-== File System Fetcher (`file-system-fetcher`)
+[#contract]
+== The Fetcher Contract
 
-Reads files from a local or mounted filesystem.
+Each fetcher implements `Fetcher#fetch(String fetchKey, Metadata metadata, ParseContext parseContext)` and returns an `InputStream` for the named document. The shape of the fetch key depends on the fetcher: for the file-system fetcher it is a path relative to `basePath`; for the S3 fetcher it is an object key relative to `prefix`; for the HTTP fetcher it is the URL itself.
 
-**Module:** `tika-pipes-file-system`
+Fetchers are stateless from the pipeline's perspective — every `fetch()` call resolves the key independently, so iterators are free to parallelize fetches.
 
-[source,json,subs=none]
-----
-include::example$pipes-fs-fetcher.json[]
-----
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`basePath`
-|_required_
-|Base directory. Fetch keys are resolved relative to this path.
-
-|`extractFileSystemMetadata`
-|`false`
-|Extract file created/modified timestamps and size into metadata.
-
-|`allowAbsolutePaths`
-|`false`
-|Allow absolute fetch keys when `basePath` is not set.
-|===
-
-== S3 Fetcher (`s3-fetcher`)
-
-Fetches objects from Amazon S3.
-
-**Module:** `tika-pipes-s3`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`bucket`
-|_required_
-|S3 bucket name.
-
-|`region`
-|_required_
-|AWS region (e.g., `us-east-1`).
-
-|`credentialsProvider`
-|`profile`
-|Credentials type: `profile`, `static`, `instance`.
-
-|`profile`
-|`default`
-|AWS profile name (when using `profile` credentials).
-
-|`accessKey` / `secretKey`
-|_none_
-|Static credentials (when using `static` credentials).
-
-|`prefix`
-|_none_
-|S3 key prefix.
-
-|`spoolToTemp`
-|`false`
-|Spool object to a temp file before parsing.
-
-|`extractUserMetadata`
-|`false`
-|Extract S3 user metadata.
-
-|`maxLength`
-|_unlimited_
-|Maximum object size to fetch.
-|===
-
-== HTTP Fetcher (`http-fetcher`)
-
-Fetches documents from HTTP/HTTPS URLs.
-
-**Module:** `tika-pipes-http`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`userName`
-|_none_
-|Basic auth username.
-
-|`password`
-|_none_
-|Basic auth password.
-
-|`connectTimeoutMillis`
-|`30000`
-|Connection timeout.
-
-|`socketTimeoutMillis`
-|`120000`
-|Socket read timeout.
+[#wiring]
+== Wiring Fetchers Into a Pipeline
 
-|`maxConnections`
-|`200`
-|Maximum concurrent connections.
+Fetchers live under the top-level `fetchers` key. Each fetcher gets an ID (the outer map key) and a type-name (the inner map key); the iterator then references the ID through its `fetcherId` field.
 
-|`userAgent`
-|_default_
-|HTTP User-Agent header.
-|===
-
-== GCS Fetcher (`gcs-fetcher`)
-
-Fetches objects from Google Cloud Storage.
-
-**Module:** `tika-pipes-gcs`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`projectId`
-|_required_
-|GCP project ID.
-
-|`bucket`
-|_required_
-|GCS bucket name.
-
-|`prefix`
-|_none_
-|Key prefix.
-
-|`spoolToTemp`
-|`false`
-|Spool to temp file before parsing.
-
-|`extractUserMetadata`
-|`false`
-|Extract GCS user metadata.
-|===
-
-== Azure Blob Fetcher (`az-blob-fetcher`)
-
-Fetches blobs from Azure Blob Storage.
-
-**Module:** `tika-pipes-az-blob`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`sasToken`
-|_required_
-|Shared Access Signature token.
-
-|`endpoint`
-|_required_
-|Azure storage endpoint URL.
-
-|`container`
-|_required_
-|Container name.
-
-|`prefix`
-|_none_
-|Blob prefix.
-
-|`extractUserMetadata`
-|`false`
-|Extract Azure user metadata.
-|===
-
-== Google Drive Fetcher (`google-drive-fetcher`)
+[source,json]
+----
+{
+  "fetchers": {
+    "primary": {
+      "file-system-fetcher": {
+        "basePath": "/data/input"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/data/input",
+      "fetcherId": "primary",
+      "emitterId": "..."
+    }
+  }
+}
+----
 
-Fetches files from Google Drive via the Drive API.
+A single pipes config may declare multiple fetchers with different IDs and use them in different iterators or pipelines.
 
-**Module:** `tika-pipes-google-drive`
+[#plugins]
+== Available Fetchers
 
-[cols="1,1,3"]
+[cols="2,2,3"]
 |===
-|Field |Default |Description
+|Plugin |Component name |Notes
 
-|`serviceAccountCredentialsPath`
-|_required_
-|Path to GCP service account JSON key file.
-
-|`impersonatedUser`
-|_none_
-|User email to impersonate (for domain-wide delegation).
-|===
+|xref:pipes/plugins/filesystem.adoc[File System]
+|`file-system-fetcher`
+|Local / mounted filesystem.
 
-== Microsoft Graph Fetcher (`microsoft-graph-fetcher`)
+|xref:pipes/plugins/s3.adoc[Amazon S3]
+|`s3-fetcher`
+|S3 or S3-compatible (MinIO, LocalStack).
 
-Fetches files from Microsoft 365 (OneDrive, SharePoint) via the Graph API.
+|xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
+|`gcs-fetcher`
+|GCS via Application Default Credentials.
 
-**Module:** `tika-pipes-microsoft-graph`
+|xref:pipes/plugins/azblob.adoc[Azure Blob Storage]
+|`az-blob-fetcher`
+|SAS-token auth.
 
-== Atlassian JWT Fetcher (`atlassian-jwt-fetcher`)
+|xref:pipes/plugins/http.adoc[HTTP]
+|`http-fetcher`
+|HTTP(S) with basic / JWT auth.
 
-Fetches content from Atlassian products using JWT authentication.
+|xref:pipes/plugins/google-drive.adoc[Google Drive]
+|`google-drive-fetcher`
+|Drive API with service-account auth.
 
-**Module:** `tika-pipes-atlassian-jwt`
+|xref:pipes/plugins/microsoft-graph.adoc[Microsoft Graph]
+|`microsoft-graph-fetcher`
+|OneDrive / SharePoint via Graph.
 
-[cols="1,1,3"]
+|xref:pipes/plugins/atlassian-jwt.adoc[Atlassian JWT]
+|`atlassian-jwt-fetcher`
+|Atlassian Connect (Jira/Confluence Cloud).
 |===
-|Field |Default |Description
 
-|`sharedSecret`
-|_required_
-|JWT shared secret.
-
-|`issuer`
-|_required_
-|JWT issuer / app key.
-
-|`connectTimeoutMillis`
-|`30000`
-|Connection timeout.
-
-|`socketTimeoutMillis`
-|`120000`
-|Socket read timeout.
-|===
+For the full plugin / interface matrix, see xref:pipes/plugins/index.adoc[Plugins].
diff --git a/docs/modules/ROOT/pages/pipes/iterators.adoc b/docs/modules/ROOT/pages/pipes/iterators.adoc
index dc433bb4928..a3e3bc7292b 100644
--- a/docs/modules/ROOT/pages/pipes/iterators.adoc
+++ b/docs/modules/ROOT/pages/pipes/iterators.adoc
@@ -16,197 +16,85 @@
 //
 
 = Pipes Iterators
+:toc:
 
-Pipes iterators enumerate the documents to be processed. Each iterator
-produces fetch/emit tuples that the pipeline consumes.
+A *pipes iterator* enumerates the documents to be processed. It emits one `FetchEmitTuple` per document; the pipeline workers then call the bound fetcher (to get the bytes), the parser, and the bound emitter (to write the result).
 
-All iterators share a `baseConfig` block that specifies which fetcher and emitter
-to use:
+[#contract]
+== The Iterator Contract
 
-[source,json]
-----
-"baseConfig": {
-  "fetcherId": "my-fetcher-id",
-  "emitterId": "my-emitter-id"
-}
-----
-
-== File System Iterator (`file-system-pipes-iterator`)
-
-Recursively walks a directory tree.
-
-**Module:** `tika-pipes-file-system`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`basePath`
-|_required_
-|Directory to walk.
-
-|`countTotal`
-|`false`
-|Count total files before processing (enables progress reporting).
-
-|`baseConfig`
-|_required_
-|Fetcher/emitter IDs.
-|===
-
-== S3 Iterator (`s3-pipes-iterator`)
-
-Lists objects in an S3 bucket.
-
-**Module:** `tika-pipes-s3`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`bucket`
-|_required_
-|S3 bucket name.
-
-|`region`
-|_required_
-|AWS region.
-
-|`prefix`
-|_none_
-|Key prefix to filter objects.
-
-|`credentialsProvider`
-|`profile`
-|Credentials type.
-
-|`baseConfig`
-|_required_
-|Fetcher/emitter IDs.
-|===
-
-== GCS Iterator (`gcs-pipes-iterator`)
-
-Lists objects in a Google Cloud Storage bucket.
-
-**Module:** `tika-pipes-gcs`
-
-== Azure Blob Iterator (`az-blob-pipes-iterator`)
-
-Lists blobs in an Azure Blob Storage container.
+A `PipesIterator` produces a stream of `FetchEmitTuple` records. Each tuple carries:
 
-**Module:** `tika-pipes-az-blob`
+* the *fetch key* — passed to the fetcher to retrieve the document bytes
+* the *emit key* — passed to the emitter to decide where to write results
+* an optional `id` and arbitrary metadata fields
 
-== CSV Iterator (`csv-pipes-iterator`)
+The iterator runs on its own thread; the pipeline reads tuples as fast as the worker pool can keep up.
 
-Reads rows from a CSV file to generate fetch/emit tuples.
+[#wiring]
+== Wiring an Iterator Into a Pipeline
 
-**Module:** `tika-pipes-csv`
+The iterator lives under the singular top-level `pipes-iterator` key. The inner map key is the iterator's component name. `fetcherId` and `emitterId` are *flat fields* on the iterator config — they are not wrapped in a `baseConfig` block.
 
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`csvPath`
-|_required_
-|Path to the CSV file.
-
-|`fetchKeyColumn`
-|_required_
-|Column name containing the fetch key (file path, S3 key, etc.).
-
-|`emitKeyColumn`
-|_none_
-|Column name for the emit key. If omitted, uses the fetch key.
-
-|`baseConfig`
-|_required_
-|Fetcher/emitter IDs.
-|===
-
-== JDBC Iterator (`jdbc-pipes-iterator`)
-
-Executes a SQL query and uses each row as a fetch/emit tuple.
-
-**Module:** `tika-pipes-jdbc`
-
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`connection`
-|_required_
-|JDBC connection string.
-
-|`select`
-|_required_
-|SQL SELECT query.
+[source,json]
+----
+{
+  "fetchers": { "fsf": { "file-system-fetcher": { "basePath": "/data/in" } } },
+  "emitters": { "fse": { "file-system-emitter": { "basePath": "/data/out" } } },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/data/in",
+      "fetcherId": "fsf",
+      "emitterId": "fse"
+    }
+  }
+}
+----
 
-|`fetchKeyColumn`
-|_required_
-|Column containing the fetch key.
+Only one iterator is active per pipeline. To process multiple sources in parallel, run multiple pipelines.
 
-|`idColumn`
-|_none_
-|Column containing the document ID.
+[#plugins]
+== Available Iterators
 
-|`baseConfig`
-|_required_
-|Fetcher/emitter IDs.
+[cols="2,2,3"]
 |===
+|Plugin |Component name |Notes
 
-== Solr Iterator (`solr-pipes-iterator`)
-
-Queries a Solr collection and uses each document as a fetch/emit tuple.
-
-**Module:** `tika-pipes-solr`
-
-== JSON Iterator (`json-pipes-iterator`)
-
-Reads an array of objects from a JSON file.
+|xref:pipes/plugins/filesystem.adoc[File System]
+|`file-system-pipes-iterator`
+|Recursively walks a directory tree.
 
-**Module:** `tika-pipes-json`
+|xref:pipes/plugins/s3.adoc[Amazon S3]
+|`s3-pipes-iterator`
+|Lists S3 objects under a prefix.
 
-[cols="1,1,3"]
-|===
-|Field |Default |Description
+|xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
+|`gcs-pipes-iterator`
+|Lists GCS objects under a prefix.
 
-|`jsonPath`
-|_required_
-|Path to the JSON file.
+|xref:pipes/plugins/azblob.adoc[Azure Blob Storage]
+|`az-blob-pipes-iterator`
+|Lists blobs under a prefix.
 
-|`baseConfig`
-|_required_
-|Fetcher/emitter IDs.
-|===
+|xref:pipes/plugins/solr.adoc[Apache Solr]
+|`solr-pipes-iterator`
+|Queries a Solr collection (useful for re-parsing).
 
-== Kafka Iterator (`kafka-pipes-iterator`)
+|xref:pipes/plugins/jdbc.adoc[JDBC]
+|`jdbc-pipes-iterator`
+|Walks rows from a SELECT query.
 
-Consumes messages from a Kafka topic as fetch/emit tuples.
+|xref:pipes/plugins/kafka.adoc[Apache Kafka]
+|`kafka-pipes-iterator`
+|Consumes fetch-request messages from a topic.
 
-**Module:** `tika-pipes-kafka`
+|xref:pipes/plugins/csv.adoc[CSV]
+|`csv-pipes-iterator`
+|Reads work items from a CSV file.
 
-[cols="1,1,3"]
+|xref:pipes/plugins/json.adoc[JSON]
+|`json-pipes-iterator`
+|Reads work items from a JSON-lines file.
 |===
-|Field |Default |Description
-
-|`topic`
-|_required_
-|Kafka topic.
-
-|`bootstrapServers`
-|_required_
-|Kafka broker addresses.
 
-|`groupId`
-|_required_
-|Consumer group ID.
-
-|`autoOffsetReset`
-|`earliest`
-|Where to start reading: `earliest` or `latest`.
-
-|`baseConfig`
-|_required_
-|Fetcher/emitter IDs.
-|===
+For the full plugin / interface matrix, see xref:pipes/plugins/index.adoc[Plugins].
diff --git a/docs/modules/ROOT/pages/pipes/plugins/atlassian-jwt.adoc b/docs/modules/ROOT/pages/pipes/plugins/atlassian-jwt.adoc
new file mode 100644
index 00000000000..30c9b9be446
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/atlassian-jwt.adoc
@@ -0,0 +1,121 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Atlassian JWT Plugin
+:toc:
+:toclevels: 3
+
+The Atlassian JWT plugin (`tika-pipes-atlassian-jwt`) provides a fetcher specifically for Atlassian Connect endpoints (Jira Cloud, Confluence Cloud) that require an Atlassian-style JWT bearer token. It is fetcher-only — pair it with another emitter and iterator.
+
+For generic HTTP fetching, use the xref:pipes/plugins/http.adoc[HTTP plugin] instead.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`atlassian-jwt-fetcher`
+|`AtlassianJwtFetcher`
+|===
+
+[#atlassian-jwt-fetcher]
+== Atlassian JWT Fetcher (`atlassian-jwt-fetcher`)
+
+Fetches resources from an Atlassian Cloud endpoint, generating a fresh JWT for each request signed with the app's shared secret.
+
+[source,json]
+----
+include::example$pipes-atlassian-jwt-fetcher.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`issuer`
+|_required_
+|Atlassian Connect app key (the `iss` claim).
+
+|`sharedSecret`
+|_required_
+|Shared secret from the app installation, used as the JWT signing key.
+
+|`subject`
+|_optional_
+|JWT `sub` claim — typically an account ID for user-context calls.
+
+|`jwtExpiresInSeconds`
+|`3600`
+|JWT validity window. Each request gets a freshly-signed token.
+
+|`userAgent`
+|_no default_
+|`User-Agent` header sent on each request.
+
+|`maxConnections`
+|`2000`
+|HTTP connection-pool size.
+
+|`maxConnectionsPerRoute`
+|`1000`
+|Per-route connection-pool size.
+
+|`connectTimeoutMillis`
+|`120000`
+|TCP connect timeout.
+
+|`socketTimeoutMillis`
+|`120000`
+|Socket read timeout.
+
+|`requestTimeoutMillis`
+|`120000`
+|Connection-manager request timeout.
+
+|`overallTimeoutMillis`
+|`120000`
+|Hard cap on total time for a single fetch operation.
+
+|`maxRedirects`
+|`0`
+|Maximum number of redirects to follow.
+
+|`maxSpoolSize`
+|`-1`
+|Maximum bytes to spool locally. `-1` means no limit.
+
+|`maxErrMsgSize`
+|`10000000`
+|Maximum bytes of error response body to capture into the exception.
+
+|`httpHeaders`
+|_empty_
+|Extra HTTP headers, formatted as `"Header: value"` strings (list).
+
+|`httpRequestHeaders`
+|_empty_
+|Structured per-request headers as a `Header → [values]` map.
+|===
+
+[#notes]
+== Notes
+
+* The JWT is computed per request — the `qsh` claim is derived from the request method and URL, as required by Atlassian Connect.
+* `sharedSecret` is sensitive; use environment-variable substitution or external secret stores rather than inlining it in source control.
+* For Jira Server / Data Center (not Cloud) endpoints, use the xref:pipes/plugins/http.adoc[HTTP plugin] with basic or token auth instead — those endpoints do not accept Atlassian Connect JWTs.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/csv.adoc b/docs/modules/ROOT/pages/pipes/plugins/csv.adoc
new file mode 100644
index 00000000000..cd44e14d24b
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/csv.adoc
@@ -0,0 +1,75 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= CSV Plugin
+:toc:
+:toclevels: 3
+
+The CSV plugin (`tika-pipes-csv`) provides an iterator that reads work items from a CSV file. It is iterator-only — pair it with a fetcher and emitter.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Iterator
+|`csv-pipes-iterator`
+|`CSVPipesIterator`
+|===
+
+[#csv-iterator]
+== CSV Iterator (`csv-pipes-iterator`)
+
+Reads each row of the CSV as a work item and emits one `FetchEmitTuple` per row.
+
+[source,json]
+----
+include::example$pipes-csv-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`csvPath`
+|_required_
+|Path to the CSV file on disk.
+
+|`idColumn`
+|_optional_
+|Column whose value becomes the iterator's row identifier.
+
+|`fetchKeyColumn`
+|_optional_
+|Column whose value becomes the fetch key on each emitted tuple.
+
+|`emitKeyColumn`
+|_optional_
+|Column whose value becomes the emit key on each emitted tuple.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#notes]
+== Notes
+
+* The CSV must have a header row — column names in the config refer to header values, not column indexes.
+* For very large CSV files, the iterator streams rows rather than loading them all into memory.
+* For row-shaped work items in JSONL (one JSON object per line), use the xref:pipes/plugins/json.adoc[JSON iterator] instead.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/elasticsearch.adoc b/docs/modules/ROOT/pages/pipes/plugins/elasticsearch.adoc
new file mode 100644
index 00000000000..110e325bae5
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/elasticsearch.adoc
@@ -0,0 +1,196 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Elasticsearch Plugin
+:toc:
+:toclevels: 3
+
+The Elasticsearch plugin (`tika-pipes-es`) provides an emitter (writes parsed docs to an Elasticsearch index) and a reporter (writes per-document processing status to Elasticsearch).
+
+It mirrors the xref:pipes/plugins/opensearch.adoc[OpenSearch plugin] in structure. The field names differ — `esUrl` instead of `openSearchUrl` — and ES adds an `apiKey` field for ApiKey-based auth in addition to basic auth.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Emitter
+|`es-emitter`
+|`ESEmitter`
+
+|Reporter
+|`es-pipes-reporter`
+|`ESPipesReporter`
+|===
+
+[#credentials]
+== Authentication
+
+Two auth modes are supported, in this priority order:
+
+1. **ApiKey** — set the top-level `apiKey` field to the Base64-encoded `id:api_key` string Elasticsearch generates. Sent as `Authorization: ApiKey <value>`.
+2. **Basic** — leave `apiKey` null/empty and set `userName` + `password` inside `httpClientConfig`. Sent as `Authorization: Basic ...`.
+
+The emitter overrides `toString()` to redact the `apiKey` value, so it does not leak into logs.
+
+[#http-client-config]
+== Shared HTTP Client Settings
+
+Both the emitter and the reporter accept a nested `httpClientConfig` block with these fields:
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`userName` / `password`
+|_optional_
+|Basic-auth credentials. Used only when `apiKey` is unset.
+
+|`authScheme`
+|_optional_
+|Set to `basic` to send credentials preemptively.
+
+|`connectionTimeoutMillis`
+|_no default_
+|HTTP connect timeout, in milliseconds.
+
+|`socketTimeoutMillis`
+|_no default_
+|HTTP socket read timeout, in milliseconds.
+
+|`proxyHost` / `proxyPort`
+|_optional_
+|Optional outbound HTTP proxy.
+|===
+
+[#es-emitter]
+== Elasticsearch Emitter (`es-emitter`)
+
+Writes parsed documents to an Elasticsearch index.
+
+[source,json]
+----
+include::example$pipes-elasticsearch-emitter.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`esUrl`
+|_required_
+|Full URL of the target Elasticsearch index, e.g., `https://es.example.com:9200/tika-docs`.
+
+|`idField`
+|_required_
+|Field in the emitted JSON document that holds the Elasticsearch `_id`.
+
+|`attachmentStrategy`
+|_no default_
+|How attached/embedded documents are indexed. One of:
+
+* `SEPARATE_DOCUMENTS` — each attachment becomes its own top-level document.
+* `PARENT_CHILD` — attachments are nested under the parent in a parent/child relation.
+
+|`updateStrategy`
+|_no default_
+|How existing documents are handled. One of:
+
+* `OVERWRITE` — replaces an existing document at `_id`.
+* `UPSERT` — merges into an existing document.
+
+|`commitWithin`
+|_no default_
+|Kept for API parity with the OpenSearch emitter. ES does not consume this value.
+
+|`embeddedFileFieldName`
+|_no default_
+|Name of the field used to hold embedded-file content (used by `PARENT_CHILD`).
+
+|`apiKey`
+|_optional_
+|Base64-encoded `id:api_key`. See <<credentials>>.
+
+|`httpClientConfig`
+|_optional_
+|See <<http-client-config>>.
+|===
+
+[#es-reporter]
+== Elasticsearch Reporter (`es-pipes-reporter`)
+
+Writes per-document processing status records to an Elasticsearch index. Useful for building dashboards over pipeline activity.
+
+[source,json]
+----
+include::example$pipes-elasticsearch-reporter.json[]
+----
+
+`pipes-reporters` accepts multiple reporters keyed by type name — see xref:pipes/reporters.adoc[Pipes Reporters] for how multiple reporters compose.
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`esUrl`
+|_required_
+|Full URL of the status index, e.g., `https://es.example.com:9200/tika-status`.
+
+|`includes`
+|_optional_
+|Set of `RESULT_STATUS` names to include (e.g., `PARSE_SUCCESS`, `PARSE_EXCEPTION`). If unset, all are reported.
+
+|`excludes`
+|_optional_
+|Set of `RESULT_STATUS` names to skip. Applied after `includes`.
+
+|`keyPrefix`
+|_optional_
+|Prefix prepended to status field names in the emitted documents.
+
+|`includeRouting`
+|`false`
+|If `true`, include ES routing info in each status record.
+
+|`apiKey`
+|_optional_
+|Base64-encoded `id:api_key`. See <<credentials>>.
+
+|`httpClientConfig`
+|_optional_
+|See <<http-client-config>>.
+|===
+
+[#es-pipeline]
+== Complete Pipeline Example
+
+The example below combines a filesystem iterator/fetcher with the Elasticsearch emitter and reporter — a common pattern for ingesting a directory of documents into ES.
+
+[source,json]
+----
+include::example$pipes-elasticsearch-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* The ES plugin's HTTP client is REST-based; it does not depend on the Elasticsearch transport client.
+* For OpenSearch deployments, use the parallel xref:pipes/plugins/opensearch.adoc[OpenSearch plugin] instead — the field names differ (`openSearchUrl` vs. `esUrl`).
+* Don't check real credentials into source control — the `apiKey` and `password` values in the examples above are placeholders.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/google-drive.adoc b/docs/modules/ROOT/pages/pipes/plugins/google-drive.adoc
new file mode 100644
index 00000000000..d7dba7631ad
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/google-drive.adoc
@@ -0,0 +1,79 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Google Drive Plugin
+:toc:
+:toclevels: 3
+
+The Google Drive plugin (`tika-pipes-google-drive`) provides a fetcher that retrieves files from a Google Drive. It is fetcher-only — pair it with another emitter and iterator.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`google-drive-fetcher`
+|`GoogleDriveFetcher`
+|===
+
+[#google-drive-fetcher]
+== Google Drive Fetcher (`google-drive-fetcher`)
+
+Fetches files from Google Drive by file ID. The fetch key is the Drive file ID.
+
+[source,json]
+----
+include::example$pipes-google-drive-fetcher.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`applicationName`
+|`tika-pipes`
+|Application name sent to the Google API for logging/quota tracking.
+
+|`serviceAccountKeyBase64`
+|_optional_
+|Base64-encoded service-account JSON key. If absent, the SDK falls back to Application Default Credentials (env var `GOOGLE_APPLICATION_CREDENTIALS` or workload identity).
+
+|`subjectUser`
+|_optional_
+|For domain-wide delegation: the user to impersonate (e.g., `user@example.com`).
+
+|`scopes`
+|_empty_
+|OAuth scopes to request. Typical: `["https://www.googleapis.com/auth/drive.readonly"]`.
+
+|`spoolToTemp`
+|`false`
+|If `true`, files are spooled to a temp file before being parsed.
+
+|`throttleSeconds`
+|_optional_
+|Rate-limit array — consecutive failures sleep for the corresponding number of seconds.
+|===
+
+[#notes]
+== Notes
+
+* The plugin uses Google's official `google-api-services-drive` SDK.
+* For domain-wide delegation, the service account must have been granted that scope in the Google Workspace admin console — Tika config alone is not enough.
+* Service-account credentials in `serviceAccountKeyBase64` are sensitive — use environment-variable substitution or external secret stores rather than checking the encoded JSON into source control.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/http.adoc b/docs/modules/ROOT/pages/pipes/plugins/http.adoc
new file mode 100644
index 00000000000..d60a006243d
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/http.adoc
@@ -0,0 +1,132 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= HTTP Plugin
+:toc:
+:toclevels: 3
+
+The HTTP plugin (`tika-pipes-http`) provides a fetcher that downloads documents over HTTP(S). It is fetcher-only — pair it with another emitter and iterator.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`http-fetcher`
+|`HttpFetcher`
+|===
+
+[#http-fetcher]
+== HTTP Fetcher (`http-fetcher`)
+
+Fetches document bytes from an HTTP(S) URL. The fetch key is the URL.
+
+[source,json]
+----
+include::example$pipes-http-fetcher.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`userName` / `password`
+|_optional_
+|Basic-auth credentials.
+
+|`ntDomain`
+|_optional_
+|NT domain for NTLM auth.
+
+|`authScheme`
+|_optional_
+|Auth scheme hint: `basic`, `digest`, `ntlm`, or unset.
+
+|`proxyHost` / `proxyPort`
+|_optional_
+|Outbound HTTP proxy.
+
+|`userAgent`
+|_no default_
+|`User-Agent` header sent on each request.
+
+|`maxConnections`
+|`2000`
+|HTTP connection-pool size.
+
+|`maxConnectionsPerRoute`
+|`1000`
+|Per-route connection-pool size.
+
+|`connectTimeoutMillis`
+|`120000`
+|TCP connect timeout.
+
+|`socketTimeoutMillis`
+|`120000`
+|Socket read timeout.
+
+|`requestTimeoutMillis`
+|`120000`
+|Connection-manager request timeout.
+
+|`overallTimeoutMillis`
+|`120000`
+|Hard cap on total time for a single fetch operation.
+
+|`maxRedirects`
+|`0`
+|Maximum number of redirects to follow. `0` means follow none.
+
+|`maxSpoolSize`
+|`-1`
+|Maximum bytes to spool locally before failing. `-1` means no limit.
+
+|`maxErrMsgSize`
+|`10000000`
+|Maximum bytes of error response body to capture into the exception.
+
+|`httpHeaders`
+|_empty_
+|Extra HTTP headers, formatted as `"Header: value"` strings (list).
+
+|`httpRequestHeaders`
+|_empty_
+|Structured per-request headers as a `Header → [values]` map. Used when a header has multiple values.
+
+|`jwtIssuer` / `jwtSubject` / `jwtExpiresInSeconds`
+|_optional_
+|JWT claims, for endpoints that accept JWT-bearer auth.
+
+|`jwtSecret`
+|_optional_
+|HMAC secret for symmetric-key JWT signing.
+
+|`jwtPrivateKeyBase64`
+|_optional_
+|Base64-encoded private key for asymmetric (RSA/ECDSA) JWT signing. Mutually exclusive with `jwtSecret`.
+|===
+
+[#notes]
+== Notes
+
+* Both basic auth and JWT auth may be configured at the same time, but only one will apply per request (JWT takes precedence when present).
+* For zero-redirect crawling, leave `maxRedirects` at `0`. The fetcher returns the redirect response as-is so the caller can decide what to do.
+* `overallTimeoutMillis` is enforced by the fetcher itself, not the HTTP client — it covers slow drains and zombie connections that the lower-level timeouts may miss.
+* For Atlassian Cloud endpoints that require an Atlassian Connect JWT, use the dedicated xref:pipes/plugins/atlassian-jwt.adoc[Atlassian JWT fetcher] instead — it has the correct claim layout baked in.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/jdbc.adoc b/docs/modules/ROOT/pages/pipes/plugins/jdbc.adoc
new file mode 100644
index 00000000000..c1c8747b5f3
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/jdbc.adoc
@@ -0,0 +1,241 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= JDBC Plugin
+:toc:
+:toclevels: 3
+
+The JDBC plugin (`tika-pipes-jdbc`) provides emitter, iterator, and reporter interfaces for relational databases. The plugin is JDBC-driver-agnostic: any database with a working JDBC driver on the plugin's classpath should work.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Emitter
+|`jdbc-emitter`
+|`JDBCEmitter`
+
+|Iterator
+|`jdbc-pipes-iterator`
+|`JDBCPipesIterator`
+
+|Reporter
+|`jdbc-reporter`
+|`JDBCPipesReporter`
+|===
+
+[#drivers]
+== JDBC Drivers
+
+The plugin does not bundle drivers. Drop the JDBC driver JAR for your database into the plugin's `lib/` directory alongside `tika-pipes-jdbc.jar` so the plugin class loader can find it. Tested drivers include H2, PostgreSQL, MySQL, SQLite, and SQL Server.
+
+[#jdbc-emitter]
+== JDBC Emitter (`jdbc-emitter`)
+
+Writes parsed documents into a relational table. The emitter uses a prepared statement built from the `insert` template; the emit key is always the first bound parameter, followed by one parameter per entry in `keys`.
+
+[source,json]
+----
+include::example$pipes-jdbc-emitter.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`connection`
+|_required_
+|JDBC connection URL (validated non-blank). Example: `jdbc:postgresql://db.example.com:5432/tika`.
+
+|`insert`
+|_required_
+|Prepared-statement `INSERT` template. Must use `?` placeholders. The first placeholder receives the emit key; subsequent placeholders receive values from `keys` in order.
+
+|`createTable`
+|_optional_
+|DDL executed once at startup. Use this to create the destination table if it does not already exist.
+
+|`alterTable`
+|_optional_
+|DDL executed once at startup, after `createTable`. Use for indexes or migrations.
+
+|`postConnection`
+|_optional_
+|SQL executed every time a new connection is opened (e.g., pragma statements for SQLite).
+
+|`maxRetries`
+|`0`
+|Number of times to retry a failed insert before giving up.
+
+|`maxStringLength`
+|`64000`
+|String columns longer than this are truncated. Set to `-1` to disable.
+
+|`keys`
+|_required_
+|Ordered map of metadata-field-name → SQL-type. Types: `string`, `int`, `long`, `bigint`, `boolean`, `timestamp`. The order matters — it must match the order of `?` placeholders in `insert`.
+
+|`attachmentStrategy`
+|`FIRST_ONLY`
+|How embedded documents are written. One of:
+
+* `FIRST_ONLY` — only the parent document is inserted; attachments are dropped.
+* `ALL` — every document (parent and attachments) gets its own row.
+
+|`multivaluedFieldStrategy`
+|`CONCATENATE`
+|How multi-valued metadata fields are handled. One of:
+
+* `FIRST_ONLY` — keep only the first value.
+* `CONCATENATE` — join values with `multivaluedFieldDelimiter`.
+
+|`multivaluedFieldDelimiter`
+|`", "`
+|Separator used by `CONCATENATE`.
+|===
+
+[#jdbc-iterator]
+== JDBC Iterator (`jdbc-pipes-iterator`)
+
+Walks rows returned by a SELECT statement, emitting one `FetchEmitTuple` per row.
+
+[source,json]
+----
+include::example$pipes-jdbc-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`connection`
+|_required_
+|JDBC connection URL.
+
+|`select`
+|_required_
+|SELECT statement to enumerate.
+
+|`idColumn`
+|_optional_
+|Column whose value becomes the iterator's row identifier.
+
+|`fetchKeyColumn`
+|_optional_
+|Column whose value becomes the fetch key on each emitted tuple.
+
+|`emitKeyColumn`
+|_optional_
+|Column whose value becomes the emit key on each emitted tuple.
+
+|`fetchKeyRangeStartColumn` / `fetchKeyRangeEndColumn`
+|_optional_
+|Columns for range-based fetch keys (advanced).
+
+|`fetchSize`
+|`-1`
+|JDBC `fetchSize` hint. `-1` lets the driver choose.
+
+|`queryTimeoutSeconds`
+|`-1`
+|JDBC statement timeout. `-1` means no timeout.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#jdbc-reporter]
+== JDBC Reporter (`jdbc-reporter`)
+
+Writes per-document processing status to a SQL table. Records are buffered in memory and flushed periodically.
+
+[source,json]
+----
+include::example$pipes-jdbc-reporter.json[]
+----
+
+`pipes-reporters` accepts multiple reporters keyed by type name — see xref:pipes/reporters.adoc[Pipes Reporters] for how multiple reporters compose.
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`connectionString`
+|_required_
+|JDBC connection URL for the status database.
+
+|`includes`
+|_empty (all reported)_
+|Set of `RESULT_STATUS` names to include (e.g., `PARSE_SUCCESS`, `PARSE_EXCEPTION`).
+
+|`excludes`
+|_empty_
+|Set of `RESULT_STATUS` names to skip. Applied after `includes`.
+
+|`tableName`
+|`tika_status`
+|Status table name.
+
+|`createTable`
+|`true`
+|If `true`, drop the existing status table (if any) and recreate it on startup. Set to `false` to preserve an existing table.
+
+|`reportSql`
+|_no default_
+|Custom prepared-statement template for inserting/updating status rows. If unset, the reporter uses `insert into <tableName> (id, status, timestamp) values (?,?,?)`. Coordinate with `reportVariables` when overriding.
+
+|`postConnectionSql`
+|_no default_
+|SQL executed each time a connection is opened (e.g., SQLite pragmas).
+
+|`reportVariables`
+|_empty_
+|Names of the variables to bind to each `?` placeholder in `reportSql`, in order. Available names: `id`, `status`, `timestamp`. Only needed when overriding `reportSql`.
+
+|`reportWithinMs`
+|`10000`
+|Milliseconds between batched flushes from the in-memory cache to the database.
+
+|`cacheSize`
+|`100`
+|Maximum in-memory cache size before a flush is forced.
+|===
+
+[#jdbc-pipeline]
+== Complete Pipeline Example
+
+The example below combines a JDBC iterator (reading work items from one table), a filesystem fetcher (reading the actual document bytes), a JDBC emitter (writing parsed metadata to a results table), and a JDBC reporter (recording per-document outcomes).
+
+[source,json]
+----
+include::example$pipes-jdbc-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* H2 (`jdbc:h2:mem:...`) is convenient for testing — no setup required — but the schema is lost when the process exits.
+* The emitter's `keys` map preserves insertion order (it's a `LinkedHashMap` in Java). When writing the JSON, list the keys in the same order as the `?` placeholders in `insert`.
+* For high-throughput inserts, point `maxRetries` at a small positive number so transient connection failures don't drop documents.
+* Bind variables are typed by the SQL type declared in `keys`, not by the metadata value's Java type. Mismatches between SQL type and column type cause inserts to fail — coordinate `createTable` with `keys`.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/json.adoc b/docs/modules/ROOT/pages/pipes/plugins/json.adoc
new file mode 100644
index 00000000000..18ff976609a
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/json.adoc
@@ -0,0 +1,63 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= JSON Plugin
+:toc:
+:toclevels: 3
+
+The JSON plugin (`tika-pipes-json`) provides an iterator that reads work items from a JSON-lines file (one JSON object per line). It is iterator-only — pair it with a fetcher and emitter.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Iterator
+|`json-pipes-iterator`
+|`JsonPipesIterator`
+|===
+
+[#json-iterator]
+== JSON Iterator (`json-pipes-iterator`)
+
+Reads each line of a JSONL file as a work item and emits one `FetchEmitTuple` per object.
+
+[source,json]
+----
+include::example$pipes-json-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`jsonPath`
+|_required_
+|Path to the JSONL file on disk.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#notes]
+== Notes
+
+* The file format is JSON-lines (also called NDJSON) — one valid JSON object per line, no surrounding array brackets.
+* Each line's JSON object is parsed and its fields become the metadata of the emitted `FetchEmitTuple`. The keys used for fetch / emit identification come from the tuple-shaped fields (e.g., `fetchKey`, `emitKey`).
+* For columnar work items in a CSV, use the xref:pipes/plugins/csv.adoc[CSV iterator] instead.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/kafka.adoc b/docs/modules/ROOT/pages/pipes/plugins/kafka.adoc
new file mode 100644
index 00000000000..f1da41fbe83
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/kafka.adoc
@@ -0,0 +1,213 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Apache Kafka Plugin
+:toc:
+:toclevels: 3
+
+The Apache Kafka plugin (`tika-pipes-kafka`) provides an emitter (publishes parsed documents to a Kafka topic) and an iterator (consumes fetch requests from a Kafka topic).
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Emitter
+|`kafka-emitter`
+|`KafkaEmitter`
+
+|Iterator
+|`kafka-pipes-iterator`
+|`KafkaPipesIterator`
+|===
+
+[#kafka-emitter]
+== Kafka Emitter (`kafka-emitter`)
+
+Publishes each parsed document as a record to a Kafka topic.
+
+[source,json]
+----
+include::example$pipes-kafka-emitter.json[]
+----
+
+=== Configuration
+
+Most fields map directly to standard Kafka producer settings; the defaults listed here match Kafka's own defaults unless noted.
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`topic`
+|_required_
+|Kafka topic to publish to (validated non-blank).
+
+|`bootstrapServers`
+|_required_
+|Comma-separated `host:port` list of Kafka brokers (validated non-blank).
+
+|`acks`
+|`all`
+|Producer acks setting: `0`, `1`, or `all`.
+
+|`lingerMs`
+|`5000`
+|Producer linger in milliseconds.
+
+|`batchSize`
+|`16384`
+|Producer batch size in bytes.
+
+|`bufferMemory`
+|`33554432`
+|Producer buffer memory in bytes (32 MiB).
+
+|`compressionType`
+|`none`
+|One of `none`, `gzip`, `snappy`, `lz4`, `zstd`.
+
+|`connectionsMaxIdleMs`
+|`540000`
+|Producer connection idle timeout.
+
+|`deliveryTimeoutMs`
+|`120000`
+|End-to-end delivery timeout.
+
+|`enableIdempotence`
+|`false`
+|Enable the idempotent producer. Requires `acks=all` and `maxInFlightRequestsPerConnection<=5`.
+
+|`interceptorClasses`
+|_no default_
+|Comma-separated list of producer interceptor class names.
+
+|`maxBlockMs`
+|`60000`
+|How long the producer blocks on `send()` when the buffer is full.
+
+|`maxInFlightRequestsPerConnection`
+|`5`
+|In-flight requests per connection.
+
+|`maxRequestSize`
+|`1048576`
+|Maximum request size in bytes (1 MiB).
+
+|`metadataMaxAgeMs`
+|`300000`
+|Metadata refresh interval.
+
+|`requestTimeoutMs`
+|`30000`
+|Request timeout.
+
+|`retries`
+|`2147483647`
+|Producer retries. Default is `Integer.MAX_VALUE`; capped by `deliveryTimeoutMs`.
+
+|`retryBackoffMs`
+|`100`
+|Backoff between retries.
+
+|`transactionTimeoutMs`
+|`60000`
+|Transaction timeout (only meaningful with `transactionalId`).
+
+|`transactionalId`
+|_no default_
+|Set to enable transactional producer.
+
+|`clientId`
+|_no default_
+|`client.id` to send with each request.
+
+|`keySerializer` / `valueSerializer`
+|_no default_
+|Fully-qualified serializer class names. Leave unset to use the plugin's defaults (string keys, JSON values).
+|===
+
+[#kafka-iterator]
+== Kafka Iterator (`kafka-pipes-iterator`)
+
+Consumes fetch-request messages from a Kafka topic and emits one `FetchEmitTuple` per message. Useful for building event-driven pipelines where some upstream system pushes work to a queue.
+
+[source,json]
+----
+include::example$pipes-kafka-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`topic`
+|_required_
+|Kafka topic to consume from.
+
+|`bootstrapServers`
+|_required_
+|Broker list.
+
+|`groupId`
+|_optional_
+|Kafka consumer group ID. Strongly recommended in production for failover and partition reassignment.
+
+|`keySerializer` / `valueSerializer`
+|_optional_
+|Custom (de)serializer class names.
+
+|`autoOffsetReset`
+|`earliest`
+|What to do on first connect: `earliest` or `latest`.
+
+|`pollDelayMs`
+|`100`
+|Sleep between `poll()` calls when the topic is idle.
+
+|`emitMax`
+|`-1`
+|Maximum tuples to emit. `-1` means unbounded.
+
+|`groupInitialRebalanceDelayMs`
+|`3000`
+|Initial rebalance delay for the consumer group.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#kafka-pipeline]
+== Complete Pipeline Example
+
+The example below wires the Kafka iterator (consuming fetch requests) with a filesystem fetcher and a Kafka emitter (publishing parsed results). Common for stream-processing-style document pipelines.
+
+[source,json]
+----
+include::example$pipes-kafka-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* The Kafka plugin uses the official `kafka-clients` SDK.
+* The emitter is fire-and-forget at the Tika level; durability is determined by Kafka's `acks` and broker replication factor, not by Tika.
+* For exactly-once semantics, set `enableIdempotence: true` (and ensure `acks: all`); for transactional semantics, also set `transactionalId`.
+* The iterator's `groupId` controls partition assignment. Set it explicitly in production — without one, the consumer receives a transient assignment that resets on restart.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/microsoft-graph.adoc b/docs/modules/ROOT/pages/pipes/plugins/microsoft-graph.adoc
new file mode 100644
index 00000000000..ede02d8f622
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/microsoft-graph.adoc
@@ -0,0 +1,85 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Microsoft Graph Plugin
+:toc:
+:toclevels: 3
+
+The Microsoft Graph plugin (`tika-pipes-microsoft-graph`) provides a fetcher that retrieves files from OneDrive, SharePoint, and other Graph-accessible sources. It is fetcher-only — pair it with another emitter and iterator.
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Fetcher
+|`microsoft-graph-fetcher`
+|`MicrosoftGraphFetcher`
+|===
+
+[#credentials]
+== Credentials
+
+The fetcher authenticates against Microsoft Entra (Azure AD) using one of two credential modes — set exactly one:
+
+* **Client secret** (`clientSecretCredentialsConfig`) — easiest to set up; client secrets rotate manually.
+* **Client certificate** (`clientCertificateCredentialsConfig`) — for environments that require certificate-based auth.
+
+Both modes need the same three identity fields: `tenantId`, `clientId`, plus either `clientSecret` or `certificate`.
+
+[#microsoft-graph-fetcher]
+== Microsoft Graph Fetcher (`microsoft-graph-fetcher`)
+
+Fetches files via the Microsoft Graph API. The fetch key encodes the Graph object identifier.
+
+[source,json]
+----
+include::example$pipes-microsoft-graph-fetcher.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`clientSecretCredentialsConfig`
+|_required (XOR)_
+|Nested object with `tenantId`, `clientId`, `clientSecret`. See <<credentials>>.
+
+|`clientCertificateCredentialsConfig`
+|_required (XOR)_
+|Nested object with `tenantId`, `clientId`, `certificate`. See <<credentials>>.
+
+|`scopes`
+|_empty_
+|OAuth scopes to request. Typical: `["https://graph.microsoft.com/.default"]` (application permissions).
+
+|`spoolToTemp`
+|`false`
+|If `true`, files are spooled to a temp file before being parsed.
+
+|`throttleSeconds`
+|_optional_
+|Rate-limit array — consecutive failures sleep for the corresponding number of seconds.
+|===
+
+[#notes]
+== Notes
+
+* The plugin uses the official `microsoft-graph` SDK.
+* For most service-to-service workflows, use application permissions (`https://graph.microsoft.com/.default` scope) — delegated permissions require an interactive flow that the fetcher does not support.
+* Client secrets are sensitive — use environment-variable substitution or external secret stores rather than inlining them in source control.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/opensearch.adoc b/docs/modules/ROOT/pages/pipes/plugins/opensearch.adoc
new file mode 100644
index 00000000000..34931251198
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/opensearch.adoc
@@ -0,0 +1,176 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= OpenSearch Plugin
+:toc:
+:toclevels: 3
+
+The OpenSearch plugin (`tika-pipes-opensearch`) provides an emitter (writes parsed docs to an OpenSearch index) and a reporter (writes per-document processing status to OpenSearch).
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Emitter
+|`opensearch-emitter`
+|`OpenSearchEmitter`
+
+|Reporter
+|`opensearch-pipes-reporter`
+|`OpenSearchPipesReporter`
+|===
+
+[#http-client-config]
+== Shared HTTP Client Settings
+
+Both the emitter and the reporter accept a nested `httpClientConfig` block with these fields:
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`userName` / `password`
+|_optional_
+|Basic-auth credentials. Omit both for an anonymous client.
+
+|`authScheme`
+|_optional_
+|Set to `basic` to send credentials preemptively.
+
+|`connectionTimeoutMillis`
+|_no default_
+|HTTP connect timeout, in milliseconds.
+
+|`socketTimeoutMillis`
+|_no default_
+|HTTP socket read timeout, in milliseconds.
+
+|`proxyHost` / `proxyPort`
+|_optional_
+|Optional outbound HTTP proxy.
+|===
+
+[#opensearch-emitter]
+== OpenSearch Emitter (`opensearch-emitter`)
+
+Writes parsed documents to an OpenSearch index.
+
+[source,json]
+----
+include::example$pipes-opensearch-emitter.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`openSearchUrl`
+|_required_
+|Full URL of the target OpenSearch index, e.g., `https://opensearch.example.com:9200/tika-docs`.
+
+|`idField`
+|_required_
+|Field in the emitted JSON document that holds the OpenSearch `_id`.
+
+|`attachmentStrategy`
+|_no default_
+|How attached/embedded documents are indexed. One of:
+
+* `SEPARATE_DOCUMENTS` — each attachment becomes its own top-level document.
+* `PARENT_CHILD` — attachments are nested under the parent in a parent/child relation.
+
+|`updateStrategy`
+|_no default_
+|How existing documents are handled. One of:
+
+* `OVERWRITE` — replaces an existing document at `_id`.
+* `UPSERT` — merges into an existing document.
+
+|`commitWithin`
+|_no default_
+|Maximum delay before the index refresh becomes visible, in milliseconds (passed to OpenSearch's `refresh` semantics).
+
+|`embeddedFileFieldName`
+|_no default_
+|Name of the field used to hold embedded-file content (used by `PARENT_CHILD`).
+
+|`httpClientConfig`
+|_optional_
+|See <<http-client-config>>.
+|===
+
+[#opensearch-reporter]
+== OpenSearch Reporter (`opensearch-pipes-reporter`)
+
+Writes per-document processing status records to an OpenSearch index. Useful for building dashboards over pipeline activity.
+
+[source,json]
+----
+include::example$pipes-opensearch-reporter.json[]
+----
+
+`pipes-reporters` accepts multiple reporters keyed by type name — see xref:pipes/reporters.adoc[Pipes Reporters] for how multiple reporters compose.
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`openSearchUrl`
+|_required_
+|Full URL of the status index, e.g., `https://opensearch.example.com:9200/tika-status`.
+
+|`includes`
+|_optional_
+|Set of `RESULT_STATUS` names to include (e.g., `PARSE_SUCCESS`, `PARSE_EXCEPTION`). If unset, all are reported.
+
+|`excludes`
+|_optional_
+|Set of `RESULT_STATUS` names to skip. Applied after `includes`.
+
+|`keyPrefix`
+|_optional_
+|Prefix prepended to status field names in the emitted documents.
+
+|`includeRouting`
+|`false`
+|If `true`, include OpenSearch routing info in each status record.
+
+|`httpClientConfig`
+|_optional_
+|See <<http-client-config>>.
+|===
+
+[#opensearch-pipeline]
+== Complete Pipeline Example
+
+The example below combines a filesystem iterator/fetcher with the OpenSearch emitter and reporter — a common pattern for ingesting a directory of documents into an OpenSearch index.
+
+[source,json]
+----
+include::example$pipes-opensearch-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* The OpenSearch plugin's HTTP client is REST-based; it does not depend on the OpenSearch transport client.
+* For Elasticsearch deployments, use the parallel xref:pipes/plugins/elasticsearch.adoc[Elasticsearch plugin] instead — the field names differ (`esUrl` vs. `openSearchUrl`) and ES adds API-key auth.
+* Don't check real credentials into source control — the `password` values in the examples above are placeholders.
diff --git a/docs/modules/ROOT/pages/pipes/plugins/solr.adoc b/docs/modules/ROOT/pages/pipes/plugins/solr.adoc
new file mode 100644
index 00000000000..808e2bcdb63
--- /dev/null
+++ b/docs/modules/ROOT/pages/pipes/plugins/solr.adoc
@@ -0,0 +1,202 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+= Apache Solr Plugin
+:toc:
+:toclevels: 3
+
+The Apache Solr plugin (`tika-pipes-solr`) provides an emitter (writes parsed docs to a Solr collection) and an iterator (enumerates documents already in Solr for re-processing).
+
+[cols="2,1,3"]
+|===
+|Interface |Component name |Class
+
+|Emitter
+|`solr-emitter`
+|`SolrEmitter`
+
+|Iterator
+|`solr-pipes-iterator`
+|`SolrPipesIterator`
+|===
+
+[#connection-modes]
+== Connection Modes
+
+Both components support two ways of locating a Solr cluster — pick exactly one:
+
+* **Direct URLs (`solrUrls`)** — list one or more node URLs. Use this for standalone Solr or for SolrCloud when you want to bypass ZooKeeper for routing.
+* **ZooKeeper (`solrZkHosts` + `solrZkChroot`)** — list the ZooKeeper ensemble; Solr discovers nodes via ZK. Use this for SolrCloud deployments.
+
+The emitter's `validate()` enforces the XOR: setting neither or both raises `TikaConfigException`.
+
+[#solr-emitter]
+== Solr Emitter (`solr-emitter`)
+
+Writes parsed documents to a Solr collection.
+
+[source,json]
+----
+include::example$pipes-solr-emitter.json[]
+----
+
+For SolrCloud with ZooKeeper-based routing, use `solrZkHosts` (and optionally `solrZkChroot`) instead of `solrUrls`:
+
+[source,json]
+----
+include::example$pipes-solr-emitter-zk.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`solrCollection`
+|_required_
+|Solr collection (or core) name (validated non-blank).
+
+|`solrUrls`
+|_required (XOR)_
+|List of node URLs, e.g., `["http://solr1.example.com:8983/solr"]`. Mutually exclusive with `solrZkHosts`.
+
+|`solrZkHosts`
+|_required (XOR)_
+|List of ZooKeeper hosts, e.g., `["zk1.example.com:2181"]`. Mutually exclusive with `solrUrls`.
+
+|`solrZkChroot`
+|_optional_
+|ZooKeeper chroot, when using `solrZkHosts`.
+
+|`idField`
+|`id`
+|Field in the emitted JSON document that holds the Solr `_id`.
+
+|`commitWithin`
+|`1000`
+|Solr `commitWithin` value, in milliseconds.
+
+|`connectionTimeoutMillis`
+|`10000`
+|HTTP connect timeout.
+
+|`socketTimeoutMillis`
+|`60000`
+|HTTP socket read timeout.
+
+|`attachmentStrategy`
+|`PARENT_CHILD`
+|How attached/embedded documents are indexed. One of:
+
+* `SEPARATE_DOCUMENTS` — each attachment becomes its own top-level document.
+* `PARENT_CHILD` — attachments are nested under the parent.
+
+|`updateStrategy`
+|`ADD`
+|How existing documents are handled. One of:
+
+* `ADD` — replaces any existing document at the same `_id`.
+* `UPDATE_MUST_EXIST` — fails if no document exists at that `_id`.
+* `UPDATE_MUST_NOT_EXIST` — fails if a document already exists at that `_id`.
+
+|`embeddedFileFieldName`
+|`embedded`
+|Field name used to hold embedded-file content (used by `PARENT_CHILD`).
+
+|`userName` / `password` / `authScheme`
+|_optional_
+|HTTP basic auth credentials.
+
+|`proxyHost` / `proxyPort`
+|_optional_
+|Optional outbound HTTP proxy.
+|===
+
+[#solr-iterator]
+== Solr Iterator (`solr-pipes-iterator`)
+
+Enumerates documents already in a Solr collection and emits one `FetchEmitTuple` per matching document. Useful for re-parsing existing documents — e.g., after a parser bug fix or a Tika upgrade.
+
+[source,json]
+----
+include::example$pipes-solr-iterator.json[]
+----
+
+=== Configuration
+
+[cols="1,1,3"]
+|===
+|Field |Default |Description
+
+|`solrCollection`
+|_required_
+|Solr collection to iterate.
+
+|`solrUrls` / `solrZkHosts` / `solrZkChroot`
+|_required (XOR)_
+|Connection mode — see <<connection-modes>>.
+
+|`filters`
+|_empty_
+|List of Solr filter queries to scope the iteration (e.g., `["status:NEEDS_REPARSE"]`).
+
+|`idField`
+|_no default_
+|Solr field used as the iterator's row identifier.
+
+|`parsingIdField` / `failCountField` / `sizeFieldName` / `additionalFields`
+|_optional_
+|Extra Solr fields surfaced into the `FetchEmitTuple` metadata. Advanced; usually unset.
+
+|`rows`
+|`5000`
+|Page size for the underlying Solr query.
+
+|`connectionTimeoutMillis`
+|`10000`
+|HTTP connect timeout.
+
+|`socketTimeoutMillis`
+|`60000`
+|HTTP socket read timeout.
+
+|`userName` / `password` / `authScheme` / `proxyHost` / `proxyPort`
+|_optional_
+|Same as the emitter.
+
+|`fetcherId` / `emitterId`
+|_required_
+|IDs of the fetcher and emitter to bind to each emitted tuple. See xref:pipes/iterators.adoc[Pipes Iterators] for the shared iterator contract.
+|===
+
+[#solr-pipeline]
+== Complete Pipeline Example
+
+The example below combines a filesystem iterator/fetcher with the Solr emitter — the common pattern for ingesting a directory of documents into Solr.
+
+[source,json]
+----
+include::example$pipes-solr-pipeline.json[]
+----
+
+[#notes]
+== Notes
+
+* The Solr plugin uses SolrJ (`solr-solrj`). HTTP/2 transport is used when available.
+* For re-parsing workflows, point a `solr-pipes-iterator` at the same collection a `solr-emitter` writes to, but use `UPDATE_MUST_EXIST` on the emitter to avoid creating phantom rows.
+* `commitWithin` is a soft guarantee — Solr may delay commits under load. For strict ordering, configure auto-commits on the Solr side and leave `commitWithin` at its default.
diff --git a/docs/modules/ROOT/pages/pipes/reporters.adoc b/docs/modules/ROOT/pages/pipes/reporters.adoc
index 3994ede95e2..01bc05e6043 100644
--- a/docs/modules/ROOT/pages/pipes/reporters.adoc
+++ b/docs/modules/ROOT/pages/pipes/reporters.adoc
@@ -16,78 +16,59 @@
 //
 
 = Pipes Reporters
+:toc:
 
-Reporters track the processing status of each document in the pipeline.
-They record whether a parse succeeded, failed, or timed out, along with
-timing information.
+A *pipes reporter* records per-document processing status — success, parse exception, timeout, OOM — as the pipeline runs. Reporters are observational; they do not gate parsing or emission.
 
-== File System Reporter (`file-system-reporter`)
+[#contract]
+== The Reporter Contract
 
-Writes a JSON status file that is updated periodically.
+Each reporter implements `PipesReporter#report(FetchEmitTuple t, PipesResult result, long elapsed)` and gets called once per processed document. Reporters typically buffer status records in memory and flush them on a background thread, so per-document calls stay cheap.
 
-**Module:** `tika-pipes-file-system`
+[#wiring]
+== Wiring Reporters Into a Pipeline
 
-[cols="1,1,3"]
-|===
-|Field |Default |Description
-
-|`statusFile`
-|_required_
-|Path to the JSON status file.
-
-|`reportUpdateMs`
-|`1000`
-|How often to update the status file (milliseconds).
-|===
-
-== JDBC Reporter (`jdbc-reporter`)
-
-Writes per-document status to a SQL database table.
-
-**Module:** `tika-pipes-jdbc`
+Reporters live under the plural top-level `pipes-reporters` key. The keys inside that block are reporter type-names; multiple reporters may run together.
 
-[cols="1,1,3"]
-|===
-|Field |Default |Description
+[source,json]
+----
+{
+  "pipes-reporters": {
+    "file-system-reporter": {
+      "statusFile": "/var/log/tika/status.json",
+      "reportUpdateMs": 1000
+    },
+    "jdbc-reporter": {
+      "connectionString": "jdbc:h2:mem:reports;DB_CLOSE_DELAY=-1"
+    }
+  }
+}
+----
 
-|`connectionString`
-|_required_
-|JDBC connection string.
+Each entry's outer key is the reporter's component name — there is no separate ID layer because reporters do not get referenced by other components.
 
-|`tableName`
-|_required_
-|Table name for status records.
+[#plugins]
+== Available Reporters
 
-|`createTable`
-|`false`
-|Auto-create the table if it does not exist.
+[cols="2,2,3"]
 |===
+|Plugin |Component name |Notes
 
-== Elasticsearch Reporter (`es-pipes-reporter`)
+|xref:pipes/plugins/filesystem.adoc[File System]
+|`file-system-reporter`
+|Writes a JSON status file periodically. Pair with an external watcher — see xref:pipes/plugins/filesystem.adoc#watching[Live status for watching applications].
 
-Writes per-document parse status back into the Elasticsearch index via upsert.
+|xref:pipes/plugins/jdbc.adoc[JDBC]
+|`jdbc-reporter`
+|Writes per-doc status rows to a SQL table.
 
-**Module:** `tika-pipes-es`
+|xref:pipes/plugins/opensearch.adoc[OpenSearch]
+|`opensearch-pipes-reporter`
+|Writes per-doc status records to an OpenSearch index.
 
-[cols="1,1,3"]
+|xref:pipes/plugins/elasticsearch.adoc[Elasticsearch]
+|`es-pipes-reporter`
+|Writes per-doc status records to an Elasticsearch index.
 |===
-|Field |Default |Description
-
-|`esUrl`
-|_required_
-|Elasticsearch endpoint (including index).
-
-|`keyPrefix`
-|`tika_`
-|Prefix for status fields (e.g., `tika_parse_status`).
-
-|`includeRouting`
-|`false`
-|Include routing in upsert requests.
-|===
-
-== OpenSearch Reporter (`opensearch-pipes-reporter`)
-
-Same as the ES reporter but for OpenSearch. Uses `openSearchUrl` instead of `esUrl`.
 
-**Module:** `tika-pipes-opensearch`
+For the full plugin / interface matrix, see xref:pipes/plugins/index.adoc[Plugins].
diff --git a/docs/modules/ROOT/pages/pipes/shared-server-mode.adoc b/docs/modules/ROOT/pages/pipes/shared-server-mode.adoc
index d956acb5fac..e95180ecc72 100644
--- a/docs/modules/ROOT/pages/pipes/shared-server-mode.adoc
+++ b/docs/modules/ROOT/pages/pipes/shared-server-mode.adoc
@@ -101,6 +101,8 @@ When using shared mode, size the JVM heap for worst-case concurrent load:
 
 In per-client mode, the same workload would use 4 x 500MB = 2GB total, but distributed across 4 isolated JVMs where one OOM only affects one request.
 
+For the per-worker `-Xmx` rule of thumb that informs both modes, see xref:pipes/cpu-sizing.adoc#heap-per-worker[Heap per worker].
+
 == Recovery Behavior
 
 When a fatal error occurs (OOM, timeout, or crash):
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
new file mode 100644
index 00000000000..e1802d01029
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/java/org/apache/tika/pipes/atlassianjwt/ConfigExamplesTest.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.atlassianjwt;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.fetcher.atlassianjwt.config.AtlassianJwtFetcherConfig;
+
+/**
+ * Validates Atlassian JWT fetcher configuration examples used in documentation.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    @Test
+    public void testAtlassianJwtFetcherConfig() throws Exception {
+        String json = readExample("atlassian-jwt-fetcher.json");
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        assertNotNull(TikaLoader.load(configFile));
+
+        JsonNode inner = OBJECT_MAPPER.readTree(json)
+                .get("fetchers").get("ajwt").get("atlassian-jwt-fetcher");
+        AtlassianJwtFetcherConfig config = AtlassianJwtFetcherConfig.load(inner.toString());
+        assertEquals("tika-pipes-app-key", config.getIssuer());
+        assertNotNull(config.getSharedSecret());
+        assertEquals("service-account@example.com", config.getSubject());
+        assertEquals(Integer.valueOf(3600), config.getJwtExpiresInSeconds());
+        assertEquals(Integer.valueOf(30000), config.getConnectTimeoutMillis());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/resources/config-examples/atlassian-jwt-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/resources/config-examples/atlassian-jwt-fetcher.json
new file mode 100644
index 00000000000..01c165a61ec
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/resources/config-examples/atlassian-jwt-fetcher.json
@@ -0,0 +1,19 @@
+{
+  "fetchers": {
+    "ajwt": {
+      "atlassian-jwt-fetcher": {
+        "issuer": "tika-pipes-app-key",
+        "sharedSecret": "REDACTED_SHARED_SECRET",
+        "subject": "service-account@example.com",
+        "jwtExpiresInSeconds": 3600,
+        "maxConnections": 2000,
+        "maxConnectionsPerRoute": 1000,
+        "connectTimeoutMillis": 30000,
+        "socketTimeoutMillis": 60000,
+        "requestTimeoutMillis": 60000,
+        "overallTimeoutMillis": 120000,
+        "userAgent": "tika-pipes/1.0"
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob-fetcher.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob-fetcher.xml
deleted file mode 100644
index 2aa6ba9a533..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob-fetcher.xml
+++ /dev/null
@@ -1,30 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-  <fetchers>
-    <fetcher class="org.apache.tika.pipes.fetcher.azblob.AZBlobFetcher">
-      <name>az-blob</name>
-      <!-- Either configure these three and send in the path for the blob OR
-           do not configure any of these and send in the full SAS url for the blob
-           as the fetchkey-->
-      <endpoint></endpoint>
-      <container></container>
-      <sasToken></sasToken>
-    </fetcher>
-  </fetchers>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob.xml
deleted file mode 100644
index c1ba42b07b0..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config/tika-config-az-blob.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.azblob.AZBlobEmitter">
-      <name>az-blob</name>
-      <!-- these have to be non-null -->
-      <endpoint></endpoint>
-      <container></container>
-      <sasToken></sasToken>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
new file mode 100644
index 00000000000..75ca4429186
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/csv/ConfigExamplesTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.csv;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.iterator.csv.CSVPipesIteratorConfig;
+
+/**
+ * Validates CSV iterator configuration example used in documentation.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    @Test
+    public void testCsvIteratorConfig() throws Exception {
+        String json = readExample("csv-pipes-iterator.json");
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        assertNotNull(TikaLoader.load(configFile));
+
+        JsonNode inner = OBJECT_MAPPER.readTree(json)
+                .get("pipes-iterator").get("csv-pipes-iterator");
+        CSVPipesIteratorConfig config = CSVPipesIteratorConfig.load(inner.toString());
+        assertNotNull(config.getCsvPath());
+        assertEquals("doc_id", config.getIdColumn());
+        assertEquals("source_path", config.getFetchKeyColumn());
+        assertEquals("output_path", config.getEmitKeyColumn());
+        assertEquals("fsf", config.getFetcherId());
+        assertEquals("fse", config.getEmitterId());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/resources/config-examples/csv-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/resources/config-examples/csv-pipes-iterator.json
new file mode 100644
index 00000000000..92abb5fb7b4
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/resources/config-examples/csv-pipes-iterator.json
@@ -0,0 +1,12 @@
+{
+  "pipes-iterator": {
+    "csv-pipes-iterator": {
+      "csvPath": "/data/work-items.csv",
+      "idColumn": "doc_id",
+      "fetchKeyColumn": "source_path",
+      "emitKeyColumn": "output_path",
+      "fetcherId": "fsf",
+      "emitterId": "fse"
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
new file mode 100644
index 00000000000..b1be5faa4be
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/java/org/apache/tika/pipes/es/ConfigExamplesTest.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.es;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.es.ESEmitterConfig;
+import org.apache.tika.pipes.reporter.es.ESReporterConfig;
+
+/**
+ * Validates Elasticsearch emitter/reporter configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testESEmitterConfig() throws Exception {
+        loadViaTikaLoader("es-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("es-emitter.json"),
+                "emitters", "ese", "es-emitter");
+        ESEmitterConfig config = ESEmitterConfig.load(inner.toString());
+        assertEquals("doc_id", config.idField());
+        assertEquals(ESEmitterConfig.AttachmentStrategy.PARENT_CHILD,
+                config.attachmentStrategy());
+        assertEquals(ESEmitterConfig.UpdateStrategy.OVERWRITE, config.updateStrategy());
+        assertEquals(1000, config.commitWithin());
+        assertNotNull(config.apiKey());
+        assertNotNull(config.httpClientConfig());
+        // The toString override redacts the apiKey value
+        assertFalse(config.toString().contains(config.apiKey()),
+                "apiKey must not appear in toString() output");
+    }
+
+    @Test
+    public void testESReporterConfig() throws Exception {
+        loadViaTikaLoader("es-reporter.json");
+
+        JsonNode inner = innerComponent(readExample("es-reporter.json"),
+                "pipes-reporters", null, "es-pipes-reporter");
+        ESReporterConfig config = ESReporterConfig.load(inner.toString());
+        assertTrue(config.esUrl().contains("tika-status"));
+        assertEquals("tika_", config.keyPrefix());
+        assertTrue(config.includeRouting());
+        assertNotNull(config.includes());
+        assertTrue(config.includes().contains("PARSE_SUCCESS"));
+        assertNotNull(config.httpClientConfig());
+    }
+
+    @Test
+    public void testESPipelineConfig() throws Exception {
+        loadViaTikaLoader("es-pipeline.json");
+
+        String json = readExample("es-pipeline.json");
+        ESEmitterConfig emitter = ESEmitterConfig.load(
+                innerComponent(json, "emitters", "ese", "es-emitter").toString());
+        ESReporterConfig reporter = ESReporterConfig.load(
+                innerComponent(json, "pipes-reporters", null, "es-pipes-reporter").toString());
+
+        assertEquals("doc_id", emitter.idField());
+        assertNotNull(reporter.httpClientConfig());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-emitter.json
new file mode 100644
index 00000000000..e0f341cd6c3
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-emitter.json
@@ -0,0 +1,19 @@
+{
+  "emitters": {
+    "ese": {
+      "es-emitter": {
+        "esUrl": "https://es.example.com:9200/tika-docs",
+        "idField": "doc_id",
+        "attachmentStrategy": "PARENT_CHILD",
+        "updateStrategy": "OVERWRITE",
+        "commitWithin": 1000,
+        "embeddedFileFieldName": "embedded",
+        "apiKey": "REDACTED_BASE64_ID_AND_KEY",
+        "httpClientConfig": {
+          "connectionTimeoutMillis": 10000,
+          "socketTimeoutMillis": 60000
+        }
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-pipeline.json
new file mode 100644
index 00000000000..1dce6405d21
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-pipeline.json
@@ -0,0 +1,60 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/data/input",
+        "extractFileSystemMetadata": false
+      }
+    }
+  },
+  "emitters": {
+    "ese": {
+      "es-emitter": {
+        "esUrl": "https://es.example.com:9200/tika-docs",
+        "idField": "doc_id",
+        "attachmentStrategy": "PARENT_CHILD",
+        "updateStrategy": "OVERWRITE",
+        "commitWithin": 1000,
+        "embeddedFileFieldName": "embedded",
+        "apiKey": "REDACTED_BASE64_ID_AND_KEY",
+        "httpClientConfig": {
+          "connectionTimeoutMillis": 10000,
+          "socketTimeoutMillis": 60000
+        }
+      }
+    }
+  },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/data/input",
+      "countTotal": true,
+      "fetcherId": "fsf",
+      "emitterId": "ese"
+    }
+  },
+  "pipes-reporters": {
+    "es-pipes-reporter": {
+      "esUrl": "https://es.example.com:9200/tika-status",
+      "includes": ["PARSE_SUCCESS", "PARSE_EXCEPTION", "OOM", "TIMEOUT"],
+      "keyPrefix": "tika_",
+      "includeRouting": true,
+      "apiKey": "REDACTED_BASE64_ID_AND_KEY",
+      "httpClientConfig": {
+        "connectionTimeoutMillis": 10000,
+        "socketTimeoutMillis": 60000
+      }
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-reporter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-reporter.json
new file mode 100644
index 00000000000..4761705035c
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-reporter.json
@@ -0,0 +1,15 @@
+{
+  "pipes-reporters": {
+    "es-pipes-reporter": {
+      "esUrl": "https://es.example.com:9200/tika-status",
+      "includes": ["PARSE_SUCCESS", "PARSE_EXCEPTION", "OOM", "TIMEOUT"],
+      "keyPrefix": "tika_",
+      "includeRouting": true,
+      "apiKey": "REDACTED_BASE64_ID_AND_KEY",
+      "httpClientConfig": {
+        "connectionTimeoutMillis": 10000,
+        "socketTimeoutMillis": 60000
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config/tika-config-gcs.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config/tika-config-gcs.xml
deleted file mode 100644
index 2ea06761e51..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config/tika-config-gcs.xml
+++ /dev/null
@@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.gcs.GCSEmitter">
-      <name>gcs</name>
-      <projectId>My First Project</projectId>
-      <bucket>tika-tallison-test-bucket</bucket>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
new file mode 100644
index 00000000000..7ee99ebca28
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/java/org/apache/tika/pipes/googledrive/ConfigExamplesTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.googledrive;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.fetcher.googledrive.config.GoogleDriveFetcherConfig;
+
+/**
+ * Validates Google Drive fetcher configuration examples used in documentation.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    @Test
+    public void testGoogleDriveFetcherConfig() throws Exception {
+        String json = readExample("google-drive-fetcher.json");
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        assertNotNull(TikaLoader.load(configFile));
+
+        JsonNode inner = OBJECT_MAPPER.readTree(json)
+                .get("fetchers").get("gdf").get("google-drive-fetcher");
+        GoogleDriveFetcherConfig config = GoogleDriveFetcherConfig.load(inner.toString());
+        assertEquals("tika-pipes", config.getApplicationName());
+        assertEquals("user@example.com", config.getSubjectUser());
+        assertNotNull(config.getServiceAccountKeyBase64());
+        assertTrue(config.getScopes().contains("https://www.googleapis.com/auth/drive.readonly"));
+        assertTrue(config.isSpoolToTemp());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/resources/config-examples/google-drive-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/resources/config-examples/google-drive-fetcher.json
new file mode 100644
index 00000000000..441a54ab16d
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/resources/config-examples/google-drive-fetcher.json
@@ -0,0 +1,13 @@
+{
+  "fetchers": {
+    "gdf": {
+      "google-drive-fetcher": {
+        "applicationName": "tika-pipes",
+        "serviceAccountKeyBase64": "REDACTED_BASE64_SERVICE_ACCOUNT_JSON",
+        "subjectUser": "user@example.com",
+        "scopes": ["https://www.googleapis.com/auth/drive.readonly"],
+        "spoolToTemp": true
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
new file mode 100644
index 00000000000..ff447df2a3c
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/java/org/apache/tika/pipes/http/ConfigExamplesTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.http;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.fetcher.http.config.HttpFetcherConfig;
+
+/**
+ * Validates HTTP fetcher configuration examples used in documentation.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    @Test
+    public void testHttpFetcherConfig() throws Exception {
+        String json = readExample("http-fetcher.json");
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        assertNotNull(TikaLoader.load(configFile));
+
+        JsonNode inner = OBJECT_MAPPER.readTree(json)
+                .get("fetchers").get("httpf").get("http-fetcher");
+        HttpFetcherConfig config = HttpFetcherConfig.load(inner.toString());
+        assertEquals("tika", config.getUserName());
+        assertEquals("basic", config.getAuthScheme());
+        assertEquals(Integer.valueOf(30000), config.getConnectTimeoutMillis());
+        assertEquals(Integer.valueOf(5), config.getMaxRedirects());
+        assertTrue(config.getHttpHeaders().contains("Accept: application/octet-stream"));
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/resources/config-examples/http-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/resources/config-examples/http-fetcher.json
new file mode 100644
index 00000000000..4514e226a8b
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/resources/config-examples/http-fetcher.json
@@ -0,0 +1,21 @@
+{
+  "fetchers": {
+    "httpf": {
+      "http-fetcher": {
+        "userName": "tika",
+        "password": "REDACTED",
+        "authScheme": "basic",
+        "userAgent": "tika-pipes/1.0",
+        "maxConnections": 2000,
+        "maxConnectionsPerRoute": 1000,
+        "connectTimeoutMillis": 30000,
+        "socketTimeoutMillis": 60000,
+        "requestTimeoutMillis": 60000,
+        "overallTimeoutMillis": 120000,
+        "maxRedirects": 5,
+        "maxSpoolSize": -1,
+        "httpHeaders": ["Accept: application/octet-stream"]
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/reporter/jdbc/JDBCPipesReporterConfig.java b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/reporter/jdbc/JDBCPipesReporterConfig.java
index 4c329b0fca7..94a7df3dc18 100644
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/reporter/jdbc/JDBCPipesReporterConfig.java
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/main/java/org/apache/tika/pipes/reporter/jdbc/JDBCPipesReporterConfig.java
@@ -64,12 +64,27 @@ public static JDBCPipesReporterConfig load(final String json)
     }
 
     @JsonCreator
-    public JDBCPipesReporterConfig(@JsonProperty("connectionString") String connectionString,
-                                   @JsonProperty("includes") Set<String> includes,
-                                   @JsonProperty("excludes") Set<String> excludes) {
-        this(connectionString,
+    public static JDBCPipesReporterConfig fromJson(
+            @JsonProperty("connectionString") String connectionString,
+            @JsonProperty("includes") Set<String> includes,
+            @JsonProperty("excludes") Set<String> excludes,
+            @JsonProperty("reportSql") String reportSql,
+            @JsonProperty("tableName") String tableName,
+            @JsonProperty("createTable") Boolean createTable,
+            @JsonProperty("postConnectionSql") String postConnectionSql,
+            @JsonProperty("reportVariables") List<String> reportVariables,
+            @JsonProperty("reportWithinMs") Long reportWithinMs,
+            @JsonProperty("cacheSize") Integer cacheSize) {
+        return new JDBCPipesReporterConfig(
+                connectionString,
                 includes == null ? Set.of() : includes,
-                excludes == null ? Set.of() : excludes, null, JDBCPipesReporter.TABLE_NAME, true,
-                null, List.of(), JDBCPipesReporter.DEFAULT_REPORT_WITHIN_MS, JDBCPipesReporter.DEFAULT_CACHE_SIZE);
+                excludes == null ? Set.of() : excludes,
+                reportSql,
+                tableName == null ? JDBCPipesReporter.TABLE_NAME : tableName,
+                createTable == null ? true : createTable,
+                postConnectionSql,
+                reportVariables == null ? List.of() : reportVariables,
+                reportWithinMs == null ? JDBCPipesReporter.DEFAULT_REPORT_WITHIN_MS : reportWithinMs,
+                cacheSize == null ? JDBCPipesReporter.DEFAULT_CACHE_SIZE : cacheSize);
     }
 }
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
new file mode 100644
index 00000000000..05b657362c0
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/java/org/apache/tika/pipes/jdbc/ConfigExamplesTest.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.jdbc;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.jdbc.JDBCEmitterConfig;
+import org.apache.tika.pipes.iterator.jdbc.JDBCPipesIteratorConfig;
+import org.apache.tika.pipes.reporter.jdbc.JDBCPipesReporterConfig;
+
+/**
+ * Validates JDBC emitter/iterator/reporter configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testJDBCEmitterConfig() throws Exception {
+        loadViaTikaLoader("jdbc-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("jdbc-emitter.json"),
+                "emitters", "jdbce", "jdbc-emitter");
+        JDBCEmitterConfig config = JDBCEmitterConfig.load(inner.toString());
+        assertTrue(config.connection().startsWith("jdbc:h2:"));
+        assertTrue(config.insert().contains("insert into parsed_docs"));
+        assertNotNull(config.keys());
+        assertEquals(4, config.keys().size());
+        assertEquals("string", config.keys().get("dc:title"));
+        assertEquals("timestamp", config.keys().get("dcterms:modified"));
+        config.validate();
+        assertEquals(JDBCEmitterConfig.AttachmentStrategy.FIRST_ONLY,
+                config.getAttachmentStrategyEnum());
+        assertEquals(JDBCEmitterConfig.MultivaluedFieldStrategy.CONCATENATE,
+                config.getMultivaluedFieldStrategyEnum());
+    }
+
+    @Test
+    public void testJDBCIteratorConfig() throws Exception {
+        loadViaTikaLoader("jdbc-pipes-iterator.json");
+
+        JsonNode inner = innerComponent(readExample("jdbc-pipes-iterator.json"),
+                "pipes-iterator", null, "jdbc-pipes-iterator");
+        JDBCPipesIteratorConfig config = JDBCPipesIteratorConfig.load(inner.toString());
+        assertTrue(config.getConnection().startsWith("jdbc:h2:"));
+        assertTrue(config.getSelect().contains("docs_to_parse"));
+        assertEquals("id", config.getIdColumn());
+        assertEquals("source_path", config.getFetchKeyColumn());
+        assertEquals("output_path", config.getEmitKeyColumn());
+        assertEquals(1000, config.getFetchSize());
+        assertEquals(60, config.getQueryTimeoutSeconds());
+        assertEquals("fsf", config.getFetcherId());
+        assertEquals("jdbce", config.getEmitterId());
+    }
+
+    @Test
+    public void testJDBCReporterConfig() throws Exception {
+        loadViaTikaLoader("jdbc-reporter.json");
+
+        JsonNode inner = innerComponent(readExample("jdbc-reporter.json"),
+                "pipes-reporters", null, "jdbc-reporter");
+        JDBCPipesReporterConfig config = JDBCPipesReporterConfig.load(inner.toString());
+        assertTrue(config.connectionString().startsWith("jdbc:h2:"));
+        assertNotNull(config.includes());
+        assertTrue(config.includes().contains("PARSE_SUCCESS"));
+        // Fields previously unreachable from JSON — see JDBCPipesReporterConfig.fromJson
+        assertEquals("tika_reporter_status", config.tableName());
+        assertEquals(false, config.createTable());
+        assertEquals(5000L, config.reportWithinMs());
+        assertEquals(500, config.cacheSize());
+    }
+
+    @Test
+    public void testJDBCPipelineConfig() throws Exception {
+        loadViaTikaLoader("jdbc-pipeline.json");
+
+        String json = readExample("jdbc-pipeline.json");
+        JDBCEmitterConfig emitter = JDBCEmitterConfig.load(
+                innerComponent(json, "emitters", "jdbce", "jdbc-emitter").toString());
+        emitter.validate();
+        JDBCPipesIteratorConfig iterator = JDBCPipesIteratorConfig.load(
+                innerComponent(json, "pipes-iterator", null, "jdbc-pipes-iterator").toString());
+        JDBCPipesReporterConfig reporter = JDBCPipesReporterConfig.load(
+                innerComponent(json, "pipes-reporters", null, "jdbc-reporter").toString());
+
+        assertEquals("jdbce", iterator.getEmitterId());
+        assertNotNull(reporter.connectionString());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-emitter.json
new file mode 100644
index 00000000000..c2e22dcdd24
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-emitter.json
@@ -0,0 +1,22 @@
+{
+  "emitters": {
+    "jdbce": {
+      "jdbc-emitter": {
+        "connection": "jdbc:h2:mem:tika;DB_CLOSE_DELAY=-1",
+        "createTable": "create table parsed_docs (path varchar(512) primary key, title varchar(1024), author varchar(512), content_length bigint, modified timestamp)",
+        "insert": "insert into parsed_docs (path, title, author, content_length, modified) values (?,?,?,?,?)",
+        "keys": {
+          "dc:title": "string",
+          "dc:creator": "string",
+          "Content-Length": "long",
+          "dcterms:modified": "timestamp"
+        },
+        "maxRetries": 0,
+        "maxStringLength": 64000,
+        "attachmentStrategy": "FIRST_ONLY",
+        "multivaluedFieldStrategy": "CONCATENATE",
+        "multivaluedFieldDelimiter": ", "
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipeline.json
new file mode 100644
index 00000000000..15e512e8549
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipeline.json
@@ -0,0 +1,56 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/data/input",
+        "extractFileSystemMetadata": false
+      }
+    }
+  },
+  "emitters": {
+    "jdbce": {
+      "jdbc-emitter": {
+        "connection": "jdbc:h2:mem:tika;DB_CLOSE_DELAY=-1",
+        "createTable": "create table parsed_docs (path varchar(512) primary key, title varchar(1024), author varchar(512), content_length bigint, modified timestamp)",
+        "insert": "insert into parsed_docs (path, title, author, content_length, modified) values (?,?,?,?,?)",
+        "keys": {
+          "dc:title": "string",
+          "dc:creator": "string",
+          "Content-Length": "long",
+          "dcterms:modified": "timestamp"
+        },
+        "attachmentStrategy": "FIRST_ONLY",
+        "multivaluedFieldStrategy": "CONCATENATE"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "jdbc-pipes-iterator": {
+      "connection": "jdbc:h2:mem:tika;DB_CLOSE_DELAY=-1",
+      "select": "select id, source_path, output_path from docs_to_parse where status = 'PENDING'",
+      "idColumn": "id",
+      "fetchKeyColumn": "source_path",
+      "emitKeyColumn": "output_path",
+      "fetcherId": "fsf",
+      "emitterId": "jdbce"
+    }
+  },
+  "pipes-reporters": {
+    "jdbc-reporter": {
+      "connectionString": "jdbc:h2:mem:tika;DB_CLOSE_DELAY=-1",
+      "includes": ["PARSE_SUCCESS", "PARSE_EXCEPTION", "OOM", "TIMEOUT"]
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipes-iterator.json
new file mode 100644
index 00000000000..85003cfbb61
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipes-iterator.json
@@ -0,0 +1,15 @@
+{
+  "pipes-iterator": {
+    "jdbc-pipes-iterator": {
+      "connection": "jdbc:h2:mem:tika;DB_CLOSE_DELAY=-1",
+      "select": "select id, source_path, output_path from docs_to_parse where status = 'PENDING'",
+      "idColumn": "id",
+      "fetchKeyColumn": "source_path",
+      "emitKeyColumn": "output_path",
+      "fetchSize": 1000,
+      "queryTimeoutSeconds": 60,
+      "fetcherId": "fsf",
+      "emitterId": "jdbce"
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-reporter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-reporter.json
new file mode 100644
index 00000000000..ccc3fd6709d
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-reporter.json
@@ -0,0 +1,12 @@
+{
+  "pipes-reporters": {
+    "jdbc-reporter": {
+      "connectionString": "jdbc:h2:mem:tika;DB_CLOSE_DELAY=-1",
+      "includes": ["PARSE_SUCCESS", "PARSE_EXCEPTION", "OOM", "TIMEOUT"],
+      "tableName": "tika_reporter_status",
+      "createTable": false,
+      "reportWithinMs": 5000,
+      "cacheSize": 500
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml
deleted file mode 100644
index 4bc2d8e875f..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter">
-      <name>jdbc</name>
-      <connection>CONNECTION_STRING</connection>
-      <createTable>create table test
-        (path varchar(512) not null,
-        attachment_num integer not null,
-        k1 boolean,
-        k2 varchar(512),
-        k3 integer,
-        k4 long);
-      </createTable>
-      <alterTable>alter table test add primary key (path, attachment_num)</alterTable>
-      <!-- the jdbc emitter always puts ths emitKey value as the first
-           item -->
-      <insert>insert into test (path, attachment_num, k1, k2, k3, k4) values (?,?,?,?,?,?);
-      </insert>
-      <!-- these are the keys in the metadata object.
-          The emitKey is added as the first element in the insert statement.
-          Then the these values are added in order.
-          They must be in the order of the insert statement.
-          The emit key is added as
-          -->
-      <keys>
-        <key k="k1" v="boolean"/>
-        <key k="k2" v="string"/>
-        <key k="k3" v="int"/>
-        <key k="k4" v="long"/>
-      </keys>
-      <attachmentStrategy>all</attachmentStrategy>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml
deleted file mode 100644
index 654b279be7d..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter">
-      <name>jdbc</name>
-      <connection>CONNECTION_STRING</connection>
-      <insert>insert into test (path, k1, k2, k3, k4) values (?,?,?,?,?);
-      </insert>
-      <!-- these are the keys in the metadata object.
-          The emitKey is added as the first element in the insert statement.
-          Then the these values are added in order.
-          They must be in the order of the insert statement.
-          The emit key is added as
-          -->
-      <keys>
-        <key k="k1" v="boolean"/>
-        <key k="k2" v="string"/>
-        <key k="k3" v="int"/>
-        <key k="k4" v="long"/>
-      </keys>
-      <attachmentStrategy>first_only</attachmentStrategy>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml
deleted file mode 100644
index eb966b54a0e..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml
+++ /dev/null
@@ -1,45 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter">
-      <name>jdbc</name>
-      <connection>CONNECTION_STRING</connection>
-      <createTable>create table test
-        (path varchar(512) primary key,
-        k1 varchar(512));
-      </createTable>
-      <!-- the jdbc emitter always puts ths emitKey value as the first
-           item -->
-      <insert>insert into test (path, k1) values (?,?);
-      </insert>
-      <!-- these are the keys in the metadata object.
-          The emitKey is added as the first element in the insert statement.
-          Then the these values are added in order.
-          They must be in the order of the insert statement.
-          -->
-      <keys>
-        <key k="k1" v="varchar(512)"/>
-      </keys>
-      <multivaluedFieldStrategy>concatenate</multivaluedFieldStrategy>
-      <multivaluedFieldDelimiter>, </multivaluedFieldDelimiter>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-trunc.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-trunc.xml
deleted file mode 100644
index 85eef281c2d..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-trunc.xml
+++ /dev/null
@@ -1,44 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter">
-      <name>jdbc</name>
-      <connection>CONNECTION_STRING</connection>
-      <createTable>create table test
-        (path varchar(512) primary key,
-        k1 varchar(12));
-      </createTable>
-      <!-- the jdbc emitter always puts ths emitKey value as the first
-           item -->
-      <insert>insert into test (path, k1) values (?,?);
-      </insert>
-      <!-- these are the keys in the metadata object.
-          The emitKey is added as the first element in the insert statement.
-          Then the these values are added in order.
-          They must be in the order of the insert statement.
-          -->
-      <keys>
-        <key k="k1" v="varchar(12)"/>
-      </keys>
-      <attachmentStrategy>first_only</attachmentStrategy>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
deleted file mode 100644
index c1a05bdec49..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<properties>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter">
-      <name>jdbc</name>
-      <connection>CONNECTION_STRING</connection>
-      <createTable>create table test
-        (path varchar(512) primary key,
-        k1 boolean,
-        k2 varchar(512),
-        k3 integer,
-        k4 long,
-        k5 bigint,
-        k6 timestamp);
-      </createTable>
-      <!-- the jdbc emitter always puts ths emitKey value as the first
-           item -->
-      <insert>insert into test (path, k1, k2, k3, k4, k5, k6) values (?,?,?,?,?,?,?);
-      </insert>
-      <!-- these are the keys in the metadata object.
-          The emitKey is added as the first element in the insert statement.
-          Then the these values are added in order.
-          They must be in the order of the insert statement.
-          -->
-      <keys>
-        <key k="k1" v="boolean"/>
-        <key k="k2" v="string"/>
-        <key k="k3" v="int"/>
-        <key k="k4" v="long"/>
-        <key k="k5" v="bigint"/>
-        <key k="k6" v="timestamp"/>
-      </keys>
-      <attachmentStrategy>first_only</attachmentStrategy>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
new file mode 100644
index 00000000000..d96140eae50
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/java/org/apache/tika/pipes/json/ConfigExamplesTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.json;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.pipesiterator.json.JsonPipesIteratorConfig;
+
+/**
+ * Validates JSON iterator configuration example used in documentation.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    @Test
+    public void testJsonIteratorConfig() throws Exception {
+        String json = readExample("json-pipes-iterator.json");
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        assertNotNull(TikaLoader.load(configFile));
+
+        JsonNode inner = OBJECT_MAPPER.readTree(json)
+                .get("pipes-iterator").get("json-pipes-iterator");
+        JsonPipesIteratorConfig config = JsonPipesIteratorConfig.load(inner.toString());
+        assertNotNull(config.getJsonPath());
+        assertEquals("fsf", config.getFetcherId());
+        assertEquals("fse", config.getEmitterId());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/resources/config-examples/json-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/resources/config-examples/json-pipes-iterator.json
new file mode 100644
index 00000000000..5bc4732bf69
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/resources/config-examples/json-pipes-iterator.json
@@ -0,0 +1,9 @@
+{
+  "pipes-iterator": {
+    "json-pipes-iterator": {
+      "jsonPath": "/data/work-items.jsonl",
+      "fetcherId": "fsf",
+      "emitterId": "fse"
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
new file mode 100644
index 00000000000..43c9a4daefb
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/java/org/apache/tika/pipes/kafka/ConfigExamplesTest.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.kafka;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.kafka.KafkaEmitterConfig;
+import org.apache.tika.pipes.iterator.kafka.KafkaPipesIteratorConfig;
+
+/**
+ * Validates Kafka emitter/iterator configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testKafkaEmitterConfig() throws Exception {
+        loadViaTikaLoader("kafka-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("kafka-emitter.json"),
+                "emitters", "kafe", "kafka-emitter");
+        KafkaEmitterConfig config = KafkaEmitterConfig.load(inner.toString());
+        assertEquals("tika-parsed-docs", config.topic());
+        assertTrue(config.bootstrapServers().contains("kafka1.example.com"));
+        assertEquals("all", config.acks());
+        assertEquals("lz4", config.compressionType());
+        assertTrue(config.enableIdempotence());
+        config.validate();
+    }
+
+    @Test
+    public void testKafkaIteratorConfig() throws Exception {
+        loadViaTikaLoader("kafka-pipes-iterator.json");
+
+        JsonNode inner = innerComponent(readExample("kafka-pipes-iterator.json"),
+                "pipes-iterator", null, "kafka-pipes-iterator");
+        KafkaPipesIteratorConfig config = KafkaPipesIteratorConfig.load(inner.toString());
+        assertEquals("tika-fetch-requests", config.getTopic());
+        assertEquals("tika-pipes-iterator", config.getGroupId());
+        assertEquals("earliest", config.getAutoOffsetReset());
+        assertEquals(100, config.getPollDelayMs());
+        assertEquals(-1, config.getEmitMax());
+        assertEquals("fsf", config.getFetcherId());
+        assertEquals("kafe", config.getEmitterId());
+    }
+
+    @Test
+    public void testKafkaPipelineConfig() throws Exception {
+        loadViaTikaLoader("kafka-pipeline.json");
+
+        String json = readExample("kafka-pipeline.json");
+        KafkaEmitterConfig emitter = KafkaEmitterConfig.load(
+                innerComponent(json, "emitters", "kafe", "kafka-emitter").toString());
+        emitter.validate();
+        assertEquals("tika-parsed-docs", emitter.topic());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-emitter.json
new file mode 100644
index 00000000000..602266e8d32
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-emitter.json
@@ -0,0 +1,19 @@
+{
+  "emitters": {
+    "kafe": {
+      "kafka-emitter": {
+        "topic": "tika-parsed-docs",
+        "bootstrapServers": "kafka1.example.com:9092,kafka2.example.com:9092",
+        "acks": "all",
+        "lingerMs": 5000,
+        "batchSize": 16384,
+        "compressionType": "lz4",
+        "enableIdempotence": true,
+        "maxRequestSize": 1048576,
+        "requestTimeoutMs": 30000,
+        "deliveryTimeoutMs": 120000,
+        "clientId": "tika-pipes-emitter"
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipeline.json
new file mode 100644
index 00000000000..01e5a528d48
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipeline.json
@@ -0,0 +1,43 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/data/input",
+        "extractFileSystemMetadata": false
+      }
+    }
+  },
+  "emitters": {
+    "kafe": {
+      "kafka-emitter": {
+        "topic": "tika-parsed-docs",
+        "bootstrapServers": "kafka1.example.com:9092",
+        "acks": "all",
+        "compressionType": "lz4",
+        "enableIdempotence": true
+      }
+    }
+  },
+  "pipes-iterator": {
+    "kafka-pipes-iterator": {
+      "topic": "tika-fetch-requests",
+      "bootstrapServers": "kafka1.example.com:9092",
+      "groupId": "tika-pipes-iterator",
+      "autoOffsetReset": "earliest",
+      "fetcherId": "fsf",
+      "emitterId": "kafe"
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipes-iterator.json
new file mode 100644
index 00000000000..5685476084f
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipes-iterator.json
@@ -0,0 +1,14 @@
+{
+  "pipes-iterator": {
+    "kafka-pipes-iterator": {
+      "topic": "tika-fetch-requests",
+      "bootstrapServers": "kafka1.example.com:9092,kafka2.example.com:9092",
+      "groupId": "tika-pipes-iterator",
+      "autoOffsetReset": "earliest",
+      "pollDelayMs": 100,
+      "emitMax": -1,
+      "fetcherId": "fsf",
+      "emitterId": "kafe"
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
new file mode 100644
index 00000000000..83159ba65ba
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/java/org/apache/tika/pipes/microsoftgraph/ConfigExamplesTest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.microsoftgraph;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.fetchers.microsoftgraph.config.MicrosoftGraphFetcherConfig;
+
+/**
+ * Validates Microsoft Graph fetcher configuration examples used in documentation.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    @Test
+    public void testMicrosoftGraphFetcherConfig() throws Exception {
+        String json = readExample("microsoft-graph-fetcher.json");
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        assertNotNull(TikaLoader.load(configFile));
+
+        JsonNode inner = OBJECT_MAPPER.readTree(json)
+                .get("fetchers").get("msgf").get("microsoft-graph-fetcher");
+        MicrosoftGraphFetcherConfig config = MicrosoftGraphFetcherConfig.load(inner.toString());
+        assertNotNull(config.getClientSecretCredentialsConfig());
+        assertEquals("REDACTED-TENANT-UUID",
+                config.getClientSecretCredentialsConfig().getTenantId());
+        assertEquals("REDACTED-CLIENT-UUID",
+                config.getClientSecretCredentialsConfig().getClientId());
+        assertTrue(config.getScopes().contains("https://graph.microsoft.com/.default"));
+        assertTrue(config.isSpoolToTemp());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/resources/config-examples/microsoft-graph-fetcher.json b/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/resources/config-examples/microsoft-graph-fetcher.json
new file mode 100644
index 00000000000..5fd5ea9a436
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/resources/config-examples/microsoft-graph-fetcher.json
@@ -0,0 +1,15 @@
+{
+  "fetchers": {
+    "msgf": {
+      "microsoft-graph-fetcher": {
+        "clientSecretCredentialsConfig": {
+          "tenantId": "REDACTED-TENANT-UUID",
+          "clientId": "REDACTED-CLIENT-UUID",
+          "clientSecret": "REDACTED"
+        },
+        "scopes": ["https://graph.microsoft.com/.default"],
+        "spoolToTemp": true
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
new file mode 100644
index 00000000000..d0c0a9eefa4
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/java/org/apache/tika/pipes/opensearch/ConfigExamplesTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.opensearch;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitterConfig;
+import org.apache.tika.pipes.reporter.opensearch.OpenSearchReporterConfig;
+
+/**
+ * Validates OpenSearch emitter/reporter configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testOpenSearchEmitterConfig() throws Exception {
+        loadViaTikaLoader("opensearch-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("opensearch-emitter.json"),
+                "emitters", "ose", "opensearch-emitter");
+        OpenSearchEmitterConfig config = OpenSearchEmitterConfig.load(inner.toString());
+        assertEquals("doc_id", config.idField());
+        assertEquals(OpenSearchEmitterConfig.AttachmentStrategy.PARENT_CHILD,
+                config.attachmentStrategy());
+        assertEquals(OpenSearchEmitterConfig.UpdateStrategy.OVERWRITE,
+                config.updateStrategy());
+        assertEquals(1000, config.commitWithin());
+        assertNotNull(config.httpClientConfig());
+        assertEquals("admin", config.httpClientConfig().userName());
+    }
+
+    @Test
+    public void testOpenSearchReporterConfig() throws Exception {
+        loadViaTikaLoader("opensearch-reporter.json");
+
+        JsonNode inner = innerComponent(readExample("opensearch-reporter.json"),
+                "pipes-reporters", null, "opensearch-pipes-reporter");
+        OpenSearchReporterConfig config = OpenSearchReporterConfig.load(inner.toString());
+        assertTrue(config.openSearchUrl().contains("tika-status"));
+        assertEquals("tika_", config.keyPrefix());
+        assertTrue(config.includeRouting());
+        assertNotNull(config.includes());
+        assertTrue(config.includes().contains("PARSE_SUCCESS"));
+        assertNotNull(config.httpClientConfig());
+    }
+
+    @Test
+    public void testOpenSearchPipelineConfig() throws Exception {
+        loadViaTikaLoader("opensearch-pipeline.json");
+
+        String json = readExample("opensearch-pipeline.json");
+        OpenSearchEmitterConfig emitter = OpenSearchEmitterConfig.load(
+                innerComponent(json, "emitters", "ose", "opensearch-emitter").toString());
+        OpenSearchReporterConfig reporter = OpenSearchReporterConfig.load(
+                innerComponent(json, "pipes-reporters", null, "opensearch-pipes-reporter").toString());
+
+        assertEquals("doc_id", emitter.idField());
+        assertNotNull(reporter.httpClientConfig());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-emitter.json
new file mode 100644
index 00000000000..0221fbfd617
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-emitter.json
@@ -0,0 +1,21 @@
+{
+  "emitters": {
+    "ose": {
+      "opensearch-emitter": {
+        "openSearchUrl": "https://opensearch.example.com:9200/tika-docs",
+        "idField": "doc_id",
+        "attachmentStrategy": "PARENT_CHILD",
+        "updateStrategy": "OVERWRITE",
+        "commitWithin": 1000,
+        "embeddedFileFieldName": "embedded",
+        "httpClientConfig": {
+          "userName": "admin",
+          "password": "REDACTED",
+          "authScheme": "basic",
+          "connectionTimeoutMillis": 10000,
+          "socketTimeoutMillis": 60000
+        }
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-pipeline.json
new file mode 100644
index 00000000000..1f196caf88b
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-pipeline.json
@@ -0,0 +1,64 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/data/input",
+        "extractFileSystemMetadata": false
+      }
+    }
+  },
+  "emitters": {
+    "ose": {
+      "opensearch-emitter": {
+        "openSearchUrl": "https://opensearch.example.com:9200/tika-docs",
+        "idField": "doc_id",
+        "attachmentStrategy": "PARENT_CHILD",
+        "updateStrategy": "OVERWRITE",
+        "commitWithin": 1000,
+        "embeddedFileFieldName": "embedded",
+        "httpClientConfig": {
+          "userName": "admin",
+          "password": "REDACTED",
+          "authScheme": "basic",
+          "connectionTimeoutMillis": 10000,
+          "socketTimeoutMillis": 60000
+        }
+      }
+    }
+  },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/data/input",
+      "countTotal": true,
+      "fetcherId": "fsf",
+      "emitterId": "ose"
+    }
+  },
+  "pipes-reporters": {
+    "opensearch-pipes-reporter": {
+      "openSearchUrl": "https://opensearch.example.com:9200/tika-status",
+      "includes": ["PARSE_SUCCESS", "PARSE_EXCEPTION", "OOM", "TIMEOUT"],
+      "keyPrefix": "tika_",
+      "includeRouting": true,
+      "httpClientConfig": {
+        "userName": "admin",
+        "password": "REDACTED",
+        "authScheme": "basic",
+        "connectionTimeoutMillis": 10000,
+        "socketTimeoutMillis": 60000
+      }
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-reporter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-reporter.json
new file mode 100644
index 00000000000..dcce3ce10bf
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-reporter.json
@@ -0,0 +1,17 @@
+{
+  "pipes-reporters": {
+    "opensearch-pipes-reporter": {
+      "openSearchUrl": "https://opensearch.example.com:9200/tika-status",
+      "includes": ["PARSE_SUCCESS", "PARSE_EXCEPTION", "OOM", "TIMEOUT"],
+      "keyPrefix": "tika_",
+      "includeRouting": true,
+      "httpClientConfig": {
+        "userName": "admin",
+        "password": "REDACTED",
+        "authScheme": "basic",
+        "connectionTimeoutMillis": 10000,
+        "socketTimeoutMillis": 60000
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/tika-config-simple-emitter.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/tika-config-simple-emitter.xml
deleted file mode 100644
index f6530a9e928..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/tika-config-simple-emitter.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<properties>
-  <metadataFilters>
-    <metadataFilter class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
-      <mappings>
-        <mapping from="X-TIKA:content" to="content"/>
-        <mapping from="X-TIKA:embedded_resource_path" to="embedded_path"/>
-        <mapping from="Content-Length" to="length"/>
-        <mapping from="dc:creator" to="creators"/>
-        <mapping from="dc:title" to="title"/>
-      </mappings>
-    </metadataFilter>
-  </metadataFilters>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitter">
-      <name>opensearch1</name>
-      <url>http://localhost:9200/tika-test</url>
-      <attachmentStrategy>concatenate-content</attachmentStrategy>
-      <contentField>content</contentField>
-      <commitWithin>10</commitWithin>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
new file mode 100644
index 00000000000..65d06c37cc9
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/solr/ConfigExamplesTest.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.solr;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.pipes.emitter.solr.SolrEmitterConfig;
+import org.apache.tika.pipes.iterator.solr.SolrPipesIteratorConfig;
+
+/**
+ * Validates Solr emitter/iterator configuration examples used in documentation.
+ * <p>
+ * The JSON configuration examples are stored in {@code src/test/resources/config-examples/}
+ * and are included directly in the AsciiDoc documentation via the {@code include::} directive.
+ */
+public class ConfigExamplesTest {
+
+    private static final String EXAMPLES_DIR = "/config-examples/";
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+    @TempDir
+    Path tempDir;
+
+    private String readExample(String resourceName) throws Exception {
+        try (InputStream is = getClass().getResourceAsStream(EXAMPLES_DIR + resourceName)) {
+            assertNotNull(is, "Resource not found: " + resourceName);
+            return new String(is.readAllBytes(), StandardCharsets.UTF_8);
+        }
+    }
+
+    private void loadViaTikaLoader(String resourceName) throws Exception {
+        String json = readExample(resourceName);
+        Path configFile = tempDir.resolve("tika-config.json");
+        Files.writeString(configFile, json, StandardCharsets.UTF_8);
+        TikaLoader loader = TikaLoader.load(configFile);
+        assertNotNull(loader, "TikaLoader should not be null for: " + resourceName);
+    }
+
+    private JsonNode innerComponent(String json, String section, String id, String typeName)
+            throws Exception {
+        JsonNode root = OBJECT_MAPPER.readTree(json);
+        JsonNode sectionNode = root.get(section);
+        assertNotNull(sectionNode, "Missing section: " + section);
+        JsonNode idNode = id == null ? sectionNode : sectionNode.get(id);
+        assertNotNull(idNode, "Missing id: " + id);
+        JsonNode typed = idNode.get(typeName);
+        assertNotNull(typed, "Missing type: " + typeName);
+        return typed;
+    }
+
+    @Test
+    public void testSolrEmitterUrlsConfig() throws Exception {
+        loadViaTikaLoader("solr-emitter.json");
+
+        JsonNode inner = innerComponent(readExample("solr-emitter.json"),
+                "emitters", "solre", "solr-emitter");
+        SolrEmitterConfig config = SolrEmitterConfig.load(inner.toString());
+        assertEquals("tika-docs", config.solrCollection());
+        assertNotNull(config.solrUrls());
+        assertEquals(2, config.solrUrls().size());
+        assertTrue(config.solrZkHosts() == null || config.solrZkHosts().isEmpty());
+        config.validate();
+        assertEquals(SolrEmitterConfig.AttachmentStrategy.PARENT_CHILD,
+                config.getAttachmentStrategyEnum());
+        assertEquals(SolrEmitterConfig.UpdateStrategy.ADD, config.getUpdateStrategyEnum());
+    }
+
+    @Test
+    public void testSolrEmitterZkConfig() throws Exception {
+        loadViaTikaLoader("solr-emitter-zk.json");
+
+        JsonNode inner = innerComponent(readExample("solr-emitter-zk.json"),
+                "emitters", "solre", "solr-emitter");
+        SolrEmitterConfig config = SolrEmitterConfig.load(inner.toString());
+        assertEquals("tika-docs", config.solrCollection());
+        assertNotNull(config.solrZkHosts());
+        assertEquals(3, config.solrZkHosts().size());
+        assertEquals("/solr", config.solrZkChroot());
+        assertTrue(config.solrUrls() == null || config.solrUrls().isEmpty());
+        config.validate();
+    }
+
+    @Test
+    public void testSolrIteratorConfig() throws Exception {
+        loadViaTikaLoader("solr-pipes-iterator.json");
+
+        JsonNode inner = innerComponent(readExample("solr-pipes-iterator.json"),
+                "pipes-iterator", null, "solr-pipes-iterator");
+        SolrPipesIteratorConfig config = SolrPipesIteratorConfig.load(inner.toString());
+        assertEquals("tika-docs", config.getSolrCollection());
+        assertEquals(5000, config.getRows());
+        assertTrue(config.getFilters().contains("status:NEEDS_REPARSE"));
+        assertEquals("fsf", config.getFetcherId());
+        assertEquals("solre", config.getEmitterId());
+    }
+
+    @Test
+    public void testSolrPipelineConfig() throws Exception {
+        loadViaTikaLoader("solr-pipeline.json");
+
+        String json = readExample("solr-pipeline.json");
+        SolrEmitterConfig emitter = SolrEmitterConfig.load(
+                innerComponent(json, "emitters", "solre", "solr-emitter").toString());
+        emitter.validate();
+        assertEquals("tika-docs", emitter.solrCollection());
+    }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter-zk.json b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter-zk.json
new file mode 100644
index 00000000000..5cfac99011c
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter-zk.json
@@ -0,0 +1,15 @@
+{
+  "emitters": {
+    "solre": {
+      "solr-emitter": {
+        "solrCollection": "tika-docs",
+        "solrZkHosts": ["zk1.example.com:2181", "zk2.example.com:2181", "zk3.example.com:2181"],
+        "solrZkChroot": "/solr",
+        "idField": "id",
+        "commitWithin": 1000,
+        "attachmentStrategy": "PARENT_CHILD",
+        "updateStrategy": "ADD"
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter.json b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter.json
new file mode 100644
index 00000000000..fb88fd79c13
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter.json
@@ -0,0 +1,17 @@
+{
+  "emitters": {
+    "solre": {
+      "solr-emitter": {
+        "solrCollection": "tika-docs",
+        "solrUrls": ["http://solr1.example.com:8983/solr", "http://solr2.example.com:8983/solr"],
+        "idField": "id",
+        "commitWithin": 1000,
+        "attachmentStrategy": "PARENT_CHILD",
+        "updateStrategy": "ADD",
+        "embeddedFileFieldName": "embedded",
+        "connectionTimeoutMillis": 10000,
+        "socketTimeoutMillis": 60000
+      }
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipeline.json b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipeline.json
new file mode 100644
index 00000000000..21a01c9c030
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipeline.json
@@ -0,0 +1,42 @@
+{
+  "content-handler-factory": {
+    "basic-content-handler-factory": {
+      "type": "TEXT",
+      "writeLimit": -1,
+      "throwOnWriteLimitReached": true
+    }
+  },
+  "fetchers": {
+    "fsf": {
+      "file-system-fetcher": {
+        "basePath": "/data/input",
+        "extractFileSystemMetadata": false
+      }
+    }
+  },
+  "emitters": {
+    "solre": {
+      "solr-emitter": {
+        "solrCollection": "tika-docs",
+        "solrUrls": ["http://solr1.example.com:8983/solr"],
+        "idField": "id",
+        "commitWithin": 1000,
+        "attachmentStrategy": "PARENT_CHILD",
+        "updateStrategy": "ADD"
+      }
+    }
+  },
+  "pipes-iterator": {
+    "file-system-pipes-iterator": {
+      "basePath": "/data/input",
+      "countTotal": true,
+      "fetcherId": "fsf",
+      "emitterId": "solre"
+    }
+  },
+  "pipes": {
+    "parseMode": "RMETA",
+    "onParseException": "EMIT",
+    "numClients": 4
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipes-iterator.json b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipes-iterator.json
new file mode 100644
index 00000000000..f6daace233a
--- /dev/null
+++ b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipes-iterator.json
@@ -0,0 +1,15 @@
+{
+  "pipes-iterator": {
+    "solr-pipes-iterator": {
+      "solrCollection": "tika-docs",
+      "solrUrls": ["http://solr1.example.com:8983/solr"],
+      "filters": ["status:NEEDS_REPARSE"],
+      "idField": "id",
+      "rows": 5000,
+      "connectionTimeoutMillis": 10000,
+      "socketTimeoutMillis": 60000,
+      "fetcherId": "fsf",
+      "emitterId": "solre"
+    }
+  }
+}
diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/tika-config-simple-emitter.xml b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/tika-config-simple-emitter.xml
deleted file mode 100644
index 5b14a54415a..00000000000
--- a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/tika-config-simple-emitter.xml
+++ /dev/null
@@ -1,48 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<properties>
-  <metadataFilters>
-    <metadataFilter class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
-      <mappings>
-        <mapping from="X-TIKA:content" to="content"/>
-        <mapping from="X-TIKA:embedded_resource_path" to="embedded_path"/>
-        <mapping from="Content-Length" to="length"/>
-        <mapping from="dc:creator" to="creators"/>
-        <mapping from="dc:title" to="title"/>
-      </mappings>
-    </metadataFilter>
-  </metadataFilters>
-  <emitters>
-    <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
-      <name>solr1</name>
-      <url>http://localhost:8983/solr/tika-test</url>
-      <attachmentStrategy>concatenate-content</attachmentStrategy>
-      <contentField>content</contentField>
-      <commitWithin>10</commitWithin>
-    </emitter>
-    <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
-      <name>solr2</name>
-      <url>http://localhost:8983/solr/tika-test</url>
-      <attachmentStrategy>parent-child</attachmentStrategy>
-      <contentField>content</contentField>
-      <commitWithin>10</commitWithin>
-    </emitter>
-  </emitters>
-</properties>
\ No newline at end of file