apache · tballison · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/docs/modules/ROOT/examples/pipes-atlassian-jwt-fetcher.json b/docs/modules/ROOT/examples/pipes-atlassian-jwt-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/resources/config-examples/atlassian-jwt-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-azblob-emitter.json b/docs/modules/ROOT/examples/pipes-azblob-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-azblob-fetcher.json b/docs/modules/ROOT/examples/pipes-azblob-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-azblob-iterator.json b/docs/modules/ROOT/examples/pipes-azblob-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-azblob-pipeline.json b/docs/modules/ROOT/examples/pipes-azblob-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-az-blob/src/test/resources/config-examples/az-blob-pipeline.json
diff --git a/docs/modules/ROOT/examples/pipes-config-template.json b/docs/modules/ROOT/examples/pipes-config-template.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-async-cli/src/main/resources/config-template.json
diff --git a/docs/modules/ROOT/examples/pipes-csv-iterator.json b/docs/modules/ROOT/examples/pipes-csv-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/resources/config-examples/csv-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-elasticsearch-emitter.json b/docs/modules/ROOT/examples/pipes-elasticsearch-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-elasticsearch-pipeline.json b/docs/modules/ROOT/examples/pipes-elasticsearch-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-pipeline.json
diff --git a/docs/modules/ROOT/examples/pipes-elasticsearch-reporter.json b/docs/modules/ROOT/examples/pipes-elasticsearch-reporter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-es/src/test/resources/config-examples/es-reporter.json
diff --git a/docs/modules/ROOT/examples/pipes-emit-all.json b/docs/modules/ROOT/examples/pipes-emit-all.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-emit-all.json
diff --git a/docs/modules/ROOT/examples/pipes-fs-emitter.json b/docs/modules/ROOT/examples/pipes-fs-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-fs-fetcher.json b/docs/modules/ROOT/examples/pipes-fs-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-fs-pipeline.json b/docs/modules/ROOT/examples/pipes-fs-pipeline.json
@@ -1 +1 @@
-../../../../tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
+../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
diff --git a/docs/modules/ROOT/examples/pipes-gcs-emitter.json b/docs/modules/ROOT/examples/pipes-gcs-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-gcs-fetcher.json b/docs/modules/ROOT/examples/pipes-gcs-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-gcs-iterator.json b/docs/modules/ROOT/examples/pipes-gcs-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-gcs-pipeline.json b/docs/modules/ROOT/examples/pipes-gcs-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-gcs/src/test/resources/config-examples/gcs-pipeline.json
diff --git a/docs/modules/ROOT/examples/pipes-google-drive-fetcher.json b/docs/modules/ROOT/examples/pipes-google-drive-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-google-drive/src/test/resources/config-examples/google-drive-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-http-fetcher.json b/docs/modules/ROOT/examples/pipes-http-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-http/src/test/resources/config-examples/http-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-emitter.json b/docs/modules/ROOT/examples/pipes-jdbc-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-iterator.json b/docs/modules/ROOT/examples/pipes-jdbc-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-pipeline.json b/docs/modules/ROOT/examples/pipes-jdbc-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-pipeline.json
diff --git a/docs/modules/ROOT/examples/pipes-jdbc-reporter.json b/docs/modules/ROOT/examples/pipes-jdbc-reporter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-jdbc/src/test/resources/config-examples/jdbc-reporter.json
diff --git a/docs/modules/ROOT/examples/pipes-json-iterator.json b/docs/modules/ROOT/examples/pipes-json-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/resources/config-examples/json-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-kafka-emitter.json b/docs/modules/ROOT/examples/pipes-kafka-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-kafka-iterator.json b/docs/modules/ROOT/examples/pipes-kafka-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-kafka-pipeline.json b/docs/modules/ROOT/examples/pipes-kafka-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-kafka/src/test/resources/config-examples/kafka-pipeline.json
diff --git a/docs/modules/ROOT/examples/pipes-microsoft-graph-fetcher.json b/docs/modules/ROOT/examples/pipes-microsoft-graph-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-microsoft-graph/src/test/resources/config-examples/microsoft-graph-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-opensearch-emitter.json b/docs/modules/ROOT/examples/pipes-opensearch-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-opensearch-pipeline.json b/docs/modules/ROOT/examples/pipes-opensearch-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-pipeline.json
diff --git a/docs/modules/ROOT/examples/pipes-opensearch-reporter.json b/docs/modules/ROOT/examples/pipes-opensearch-reporter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-opensearch/src/test/resources/config-examples/opensearch-reporter.json
diff --git a/docs/modules/ROOT/examples/pipes-s3-emitter.json b/docs/modules/ROOT/examples/pipes-s3-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-s3-fetcher.json b/docs/modules/ROOT/examples/pipes-s3-fetcher.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-fetcher.json
diff --git a/docs/modules/ROOT/examples/pipes-s3-iterator.json b/docs/modules/ROOT/examples/pipes-s3-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-s3-pipeline.json b/docs/modules/ROOT/examples/pipes-s3-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-s3/src/test/resources/config-examples/s3-pipeline.json
diff --git a/docs/modules/ROOT/examples/pipes-shared-server.json b/docs/modules/ROOT/examples/pipes-shared-server.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-shared-server.json
diff --git a/docs/modules/ROOT/examples/pipes-solr-emitter-zk.json b/docs/modules/ROOT/examples/pipes-solr-emitter-zk.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter-zk.json
diff --git a/docs/modules/ROOT/examples/pipes-solr-emitter.json b/docs/modules/ROOT/examples/pipes-solr-emitter.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-emitter.json
diff --git a/docs/modules/ROOT/examples/pipes-solr-iterator.json b/docs/modules/ROOT/examples/pipes-solr-iterator.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipes-iterator.json
diff --git a/docs/modules/ROOT/examples/pipes-solr-pipeline.json b/docs/modules/ROOT/examples/pipes-solr-pipeline.json
@@ -0,0 +1 @@
+../../../../tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/resources/config-examples/solr-pipeline.json
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
@@ -31,6 +31,22 @@
 ** xref:pipes/unpack-config.adoc[Extracting Embedded Bytes]
 ** xref:pipes/timeouts.adoc[Timeouts]
 ** xref:pipes/cpu-sizing.adoc[Forked-JVM CPU Sizing]
+** xref:pipes/plugins/index.adoc[Plugins]
+*** xref:pipes/plugins/filesystem.adoc[File System]
+*** xref:pipes/plugins/s3.adoc[Amazon S3]
+*** xref:pipes/plugins/gcs.adoc[Google Cloud Storage]
+*** xref:pipes/plugins/azblob.adoc[Azure Blob Storage]
+*** xref:pipes/plugins/opensearch.adoc[OpenSearch]
+*** xref:pipes/plugins/elasticsearch.adoc[Elasticsearch]
+*** xref:pipes/plugins/solr.adoc[Apache Solr]
+*** xref:pipes/plugins/jdbc.adoc[JDBC]
+*** xref:pipes/plugins/kafka.adoc[Apache Kafka]
+*** xref:pipes/plugins/http.adoc[HTTP]
+*** xref:pipes/plugins/google-drive.adoc[Google Drive]
+*** xref:pipes/plugins/microsoft-graph.adoc[Microsoft Graph]
+*** xref:pipes/plugins/atlassian-jwt.adoc[Atlassian JWT]
+*** xref:pipes/plugins/csv.adoc[CSV]
+*** xref:pipes/plugins/json.adoc[JSON]
 * xref:configuration/index.adoc[Configuration]
 ** xref:configuration/parsers/pdf-parser.adoc[PDF Parser]
 ** xref:configuration/parsers/tesseract-ocr-parser.adoc[Tesseract OCR]

diff --git a/docs/modules/ROOT/pages/pipes/configuration.adoc b/docs/modules/ROOT/pages/pipes/configuration.adoc
@@ -98,7 +98,7 @@ See also xref:pipes/timeouts.adoc[Timeouts] for the full timeout model.
 
 |`parseMode`
 |`RMETA`
-|How embedded documents are handled: `RMETA` (recursive metadata list), `CONCATENATE`, `CONTENT_ONLY`, `UNPACK`. See xref:pipes/parse-modes.adoc[Parse Modes].
+|How embedded documents are handled: `RMETA` (recursive metadata list), `CONCATENATE`, `CONTENT_ONLY`, `NO_PARSE`, `UNPACK`. See xref:pipes/parse-modes.adoc[Parse Modes].
 
 |`onParseException`
 |`EMIT`
@@ -150,3 +150,54 @@ These settings control how parsed results are batched before sending to emitters
 |===
 
 See xref:pipes/shared-server-mode.adoc[Shared Server Mode] for details.
+
+[#complete-examples]
+== Complete examples
+
+Worked-out end-to-end configs from the test tree. Each is loaded by an automated test, so the syntax stays current.
+
+[#fs-pipeline]
+=== Filesystem-to-filesystem pipeline
+
+[source,json,subs=none]
+----
+include::example$pipes-fs-pipeline.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json[View source on GitHub]
+
+Tokens (`FETCHER_BASE_PATH`, `EMITTER_BASE_PATH`, `PLUGINS_PATHS`) are substituted by the test harness — replace them with real paths in production configs.
+
+[#emit-all]
+=== Emit-all variant
+
+[source,json,subs=none]
+----
+include::example$pipes-emit-all.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-emit-all.json[View source on GitHub]
+
+[#shared-server-example]
+=== Shared-server (YOLO) mode
+
+[source,json,subs=none]
+----
+include::example$pipes-shared-server.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-shared-server.json[View source on GitHub]
+
+See xref:pipes/shared-server-mode.adoc[Shared Server Mode] for the trade-offs.
+
+[#config-template]
+=== `tika-async-cli` config template
+
+[source,json,subs=none]
+----
+include::example$pipes-config-template.json[]
+----
+
+icon:github[] https://github.com/apache/tika/blob/main/tika-pipes/tika-async-cli/src/main/resources/config-template.json[View source on GitHub]
+
+For per-plugin pipeline examples (S3, OpenSearch, JDBC, Kafka, etc.), see the relevant page under xref:pipes/plugins/index.adoc[Plugins].
diff --git a/docs/modules/ROOT/pages/pipes/cpu-sizing.adoc b/docs/modules/ROOT/pages/pipes/cpu-sizing.adoc
@@ -129,6 +129,23 @@ When Tika sees an explicit `-XX:ActiveProcessorCount` in `forkedJvmArgs`, it
 respects your value and skips the auto-injection — the sizing summary will
 report `autoCap=user-set in forkedJvmArgs`.
 
+[#heap-per-worker]
+== Heap per worker — rule of thumb
+
+A reasonable starting point is **~2 GB of heap per forked worker** (passed via `-Xmx2g` in `forkedJvmArgs`). The number falls out of three independent constraints any of which can dominate:
+
+* **Worst-case PDF parsing.** A handful of pathological PDFs in any reasonably large corpus will allocate hundreds of MB of intermediate object data per document — large image streams, deeply nested form fields, big embedded fonts. Smaller heaps OOM on those documents; larger heaps just let GC clean up between docs.
+* **Embedded-document explosion.** A zip-bomb-shaped office document with thousands of embedded objects multiplies per-doc allocation by the embedding count. The `maxEmbeddedResources` setting caps the count, but each retained object still lives in the heap until the whole tree finishes parsing.
+* **GC headroom.** G1GC behaves poorly above ~85% occupancy. A `-Xmx2g` worker comfortably handles documents that allocate up to ~1.5 GB of live data; below that you start trading throughput for memory.
+
+This is a default — not a tuning recommendation. To right-size for your specific corpus:
+
+. Measure peak per-worker live-heap with `-Xlog:gc*` (look at the post-GC working set, not the peak before GC).
+. Pick `-Xmx` ≈ `1.5 × peakLiveHeap` to leave GC headroom.
+. Re-measure under your real concurrency. Embedded-doc-heavy formats (PowerPoint, complex Word) shift this number up; flat text or PDF-text-only shifts it down.
+
+The pod-level heap budget is `numClients × per-worker-Xmx + parent-overhead`. On a 16 GB node running `numClients=4`, that's about `4 × 2 GB + 1 GB ≈ 9 GB` — comfortably below the node limit, leaving room for kernel, IO buffers, and a non-saturated pod.
+
 == Container & cgroup behavior
 
 The formula uses `Runtime.availableProcessors()` for the host CPU count,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../../../tika-pipes/tika-pipes-plugins/tika-pipes-atlassian-jwt/src/test/resources/config-examples/atlassian-jwt-fetcher.json
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../../../tika-pipes/tika-async-cli/src/main/resources/config-template.json
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-emit-all.json
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		../../../../tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
		../../../../tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json