diff --git a/.github/labeler.yml b/.github/labeler.yml
index 4d44b76b10..ffe59fb600 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -198,6 +198,11 @@ integration:pgvector:
       - any-glob-to-any-file: "integrations/pgvector/**/*"
       - any-glob-to-any-file: ".github/workflows/pgvector.yml"
 
+integration:presidio:
+  - changed-files:
+      - any-glob-to-any-file: "integrations/presidio/**/*"
+      - any-glob-to-any-file: ".github/workflows/presidio.yml"
+
 integration:pinecone:
   - changed-files:
       - any-glob-to-any-file: "integrations/pinecone/**/*"
diff --git a/.github/workflows/CI_coverage_comment.yml b/.github/workflows/CI_coverage_comment.yml
index f4b83385a5..7c80a698cf 100644
--- a/.github/workflows/CI_coverage_comment.yml
+++ b/.github/workflows/CI_coverage_comment.yml
@@ -43,6 +43,7 @@ on:
       - "Test / paddleocr"
       - "Test / pgvector"
       - "Test / pinecone"
+      - "Test / presidio"
       - "Test / pyversity"
       - "Test / qdrant"
       - "Test / ragas"
diff --git a/.github/workflows/presidio.yml b/.github/workflows/presidio.yml
new file mode 100644
index 0000000000..597d5fb208
--- /dev/null
+++ b/.github/workflows/presidio.yml
@@ -0,0 +1,72 @@
+name: Test / presidio
+
+on:
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    paths:
+      - "integrations/presidio/**"
+      - "!integrations/presidio/*.md"
+      - ".github/workflows/presidio.yml"
+
+defaults:
+  run:
+    working-directory: integrations/presidio
+
+concurrency:
+  group: presidio-${{ github.head_ref }}
+  cancel-in-progress: true
+
+env:
+  PYTHONUNBUFFERED: "1"
+  FORCE_COLOR: "1"
+
+jobs:
+  run:
+    name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.10", "3.14"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Hatch
+        run: pip install hatch
+
+      - name: Lint
+        if: matrix.python-version == '3.10' && runner.os == 'Linux'
+        run: hatch run fmt-check && hatch run test:types
+
+      - name: Run unit tests
+        run: hatch run test:unit-cov-retry
+
+      - name: Run unit tests with lowest direct dependencies
+        run: |
+          hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
+          hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
+          hatch run test:unit
+
+      - name: Nightly - run unit tests with Haystack main branch
+        if: github.event_name == 'schedule'
+        run: |
+          hatch env prune
+          hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
+          hatch run test:unit
+
+  notify-slack-on-failure:
+    needs: run
+    if: failure() && github.event_name == 'schedule'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1
+        with:
+          slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}
diff --git a/README.md b/README.md
index f52e01b852..32c0508234 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 | [paddleocr-haystack](integrations/paddleocr/)                           | Converter                    | [![PyPI - Version](https://img.shields.io/pypi/v/paddleocr-haystack.svg)](https://pypi.org/project/paddleocr-haystack)                                   | [![Test / paddleocr](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/paddleocr.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/paddleocr.yml)                                        | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-paddleocr/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-paddleocr/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-paddleocr-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-paddleocr-combined/htmlcov/index.html) |
 | [pinecone-haystack](integrations/pinecone/)                             | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/pinecone-haystack.svg?color=orange)](https://pypi.org/project/pinecone-haystack)                        | [![Test / pinecone](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pinecone.yml)                                           | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-pinecone/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-pinecone/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-pinecone-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-pinecone-combined/htmlcov/index.html) |
 | [pgvector-haystack](integrations/pgvector/)                             | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/pgvector-haystack.svg?color=orange)](https://pypi.org/project/pgvector-haystack)                        | [![Test / pgvector](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pgvector.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pgvector.yml)                                           | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-pgvector/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-pgvector/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-pgvector-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-pgvector-combined/htmlcov/index.html) |
+| [presidio-haystack](integrations/presidio/)                             | Preprocessor                | [![PyPI - Version](https://img.shields.io/pypi/v/presidio-haystack.svg)](https://pypi.org/project/presidio-haystack)                                     | [![Test / presidio](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/presidio.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/presidio.yml)                                           | | |
 | [pyversity-haystack](integrations/pyversity/)                           | Ranker                      | [![PyPI - Version](https://img.shields.io/pypi/v/pyversity-haystack.svg)](https://pypi.org/project/pyversity-haystack)                                   | [![Test / pyversity](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pyversity.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/pyversity.yml)                                        | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-pyversity/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-pyversity/htmlcov/index.html) |  |
 | [qdrant-haystack](integrations/qdrant/)                                 | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/qdrant-haystack.svg?color=orange)](https://pypi.org/project/qdrant-haystack)                            | [![Test / qdrant](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml)                                                 | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-qdrant/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-qdrant/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-qdrant-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-qdrant-combined/htmlcov/index.html) |
 | [ragas-haystack](integrations/ragas/)                                   | Evaluator                   | [![PyPI - Version](https://img.shields.io/pypi/v/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack)                                           | [![Test / ragas](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml)                                                    | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-ragas/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-ragas/htmlcov/index.html) |  |
diff --git a/integrations/presidio/README.md b/integrations/presidio/README.md
new file mode 100644
index 0000000000..2f9e57089b
--- /dev/null
+++ b/integrations/presidio/README.md
@@ -0,0 +1,10 @@
+# presidio-haystack
+
+[![PyPI - Version](https://img.shields.io/pypi/v/presidio-haystack.svg)](https://pypi.org/project/presidio-haystack)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/presidio-haystack.svg)](https://pypi.org/project/presidio-haystack)
+
+- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/presidio/CHANGELOG.md)
+
+---
+
+Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
diff --git a/integrations/presidio/pydoc/config_docusaurus.yml b/integrations/presidio/pydoc/config_docusaurus.yml
new file mode 100644
index 0000000000..def818e2a9
--- /dev/null
+++ b/integrations/presidio/pydoc/config_docusaurus.yml
@@ -0,0 +1,15 @@
+loaders:
+  - modules:
+      - haystack_integrations.components.preprocessors.presidio.presidio_document_cleaner
+      - haystack_integrations.components.preprocessors.presidio.presidio_text_cleaner
+      - haystack_integrations.components.preprocessors.presidio.presidio_entity_extractor
+    search_path: [../src]
+processors:
+  - type: filter
+    documented_only: true
+    skip_empty_modules: true
+renderer:
+  description: Presidio integration for Haystack
+  id: integrations-presidio
+  filename: presidio.md
+  title: Presidio
diff --git a/integrations/presidio/pyproject.toml b/integrations/presidio/pyproject.toml
new file mode 100644
index 0000000000..c8e166796f
--- /dev/null
+++ b/integrations/presidio/pyproject.toml
@@ -0,0 +1,167 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "presidio-haystack"
+dynamic = ["version"]
+description = "Haystack integration for Microsoft Presidio — PII detection and anonymization"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "Apache-2.0"
+keywords = ["Haystack", "Presidio", "PII", "anonymization", "privacy", "NLP"]
+authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
+classifiers = [
+  "License :: OSI Approved :: Apache Software License",
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = [
+  "haystack-ai>=2.9.0",
+  "presidio-analyzer>=2.2.0",
+  "presidio-anonymizer>=2.2.0",
+]
+
+[project.urls]
+Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/presidio#readme"
+Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
+Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/presidio"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/haystack_integrations"]
+
+[tool.hatch.version]
+source = "vcs"
+tag-pattern = 'integrations\/presidio-v(?P<version>.*)'
+
+[tool.hatch.version.raw-options]
+root = "../.."
+git_describe_command = 'git describe --tags --match="integrations/presidio-v[0-9]*"'
+
+[tool.hatch.envs.default]
+installer = "uv"
+dependencies = ["haystack-pydoc-tools", "ruff"]
+
+[tool.hatch.envs.default.scripts]
+docs = ["haystack-pydoc pydoc/config_docusaurus.yml"]
+fmt = "ruff check --fix {args}; ruff format {args}"
+fmt-check = "ruff check {args} && ruff format --check {args}"
+
+[tool.hatch.envs.test]
+dependencies = [
+    "pytest",
+    "pytest-asyncio",
+    "pytest-cov",
+    "pytest-rerunfailures",
+    "mypy",
+    "pip",
+]
+
+[tool.hatch.envs.test.scripts]
+unit = 'pytest -m "not integration" {args:tests}'
+integration = 'pytest -m "integration" {args:tests}'
+all = 'pytest {args:tests}'
+unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
+types = "mypy -p haystack_integrations.components.preprocessors.presidio {args}"
+
+[tool.mypy]
+install_types = true
+non_interactive = true
+check_untyped_defs = true
+disallow_incomplete_defs = true
+
+[[tool.mypy.overrides]]
+module = [
+  "presidio_analyzer",
+  "presidio_analyzer.*",
+  "presidio_anonymizer",
+  "presidio_anonymizer.*",
+]
+ignore_missing_imports = true
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+    "A",
+    "ANN",
+    "ARG",
+    "B",
+    "C",
+    "D102",
+    "D103",
+    "D205",
+    "D209",
+    "D213",
+    "D417",
+    "D419",
+    "DTZ",
+    "E",
+    "EM",
+    "F",
+    "I",
+    "ICN",
+    "ISC",
+    "N",
+    "PLC",
+    "PLE",
+    "PLR",
+    "PLW",
+    "Q",
+    "RUF",
+    "S",
+    "T",
+    "TID",
+    "UP",
+    "W",
+    "YTT",
+]
+ignore = [
+    "B027",
+    "B008",
+    "S105",
+    "S106",
+    "S107",
+    "C901",
+    "PLR0911",
+    "PLR0912",
+    "PLR0913",
+    "PLR0915",
+    "ANN401",
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["haystack_integrations"]
+
+[tool.ruff.lint.flake8-tidy-imports]
+ban-relative-imports = "parents"
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**/*" = ["PLR2004", "S101", "TID252", "D", "ANN"]
+
+[tool.coverage.run]
+source = ["haystack_integrations"]
+branch = true
+relative_files = true
+parallel = false
+
+[tool.coverage.report]
+omit = ["*/tests/*", "*/__init__.py"]
+show_missing = true
+exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
+
+[tool.pytest.ini_options]
+addopts = "--strict-markers"
+markers = [
+  "integration: integration tests",
+]
+log_cli = true
+asyncio_default_fixture_loop_scope = "function"
diff --git a/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/__init__.py b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/__init__.py
new file mode 100644
index 0000000000..bdaf79cba1
--- /dev/null
+++ b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/__init__.py
@@ -0,0 +1,9 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from haystack_integrations.components.preprocessors.presidio.presidio_document_cleaner import PresidioDocumentCleaner
+from haystack_integrations.components.preprocessors.presidio.presidio_entity_extractor import PresidioEntityExtractor
+from haystack_integrations.components.preprocessors.presidio.presidio_text_cleaner import PresidioTextCleaner
+
+__all__ = ["PresidioDocumentCleaner", "PresidioEntityExtractor", "PresidioTextCleaner"]
diff --git a/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_document_cleaner.py b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_document_cleaner.py
new file mode 100644
index 0000000000..348593f440
--- /dev/null
+++ b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_document_cleaner.py
@@ -0,0 +1,112 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from haystack import Document, component, logging
+from presidio_analyzer import AnalyzerEngine
+from presidio_anonymizer import AnonymizerEngine
+
+logger = logging.getLogger(__name__)
+
+
+@component
+class PresidioDocumentCleaner:
+    """
+    Anonymizes PII in Haystack Documents using [Microsoft Presidio](https://microsoft.github.io/presidio/).
+
+    Accepts a list of Documents, detects personally identifiable information (PII) in their
+    text content, and returns new Documents with PII replaced by entity type placeholders
+    (e.g. `<PERSON>`, `<EMAIL_ADDRESS>`). Original Documents are not mutated.
+
+    Documents without text content are passed through unchanged.
+
+    Call `warm_up()` before running this component to load the Presidio analyzer and anonymizer engines.
+
+    ### Usage example
+
+    ```python
+    from haystack import Document
+    from haystack_integrations.components.preprocessors.presidio import PresidioDocumentCleaner
+
+    cleaner = PresidioDocumentCleaner()
+    cleaner.warm_up()
+    result = cleaner.run(documents=[Document(content="My name is John and my email is john@example.com")])
+    print(result["documents"][0].content)
+    # My name is <PERSON> and my email is <EMAIL_ADDRESS>
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        language: str = "en",
+        entities: list[str] | None = None,
+        score_threshold: float = 0.35,
+    ) -> None:
+        """
+        Initializes the PresidioDocumentCleaner.
+
+        :param language:
+            Language code for PII detection. Defaults to `"en"`.
+            See [Presidio supported languages](https://microsoft.github.io/presidio/supported_languages/).
+        :param entities:
+            List of PII entity types to detect and anonymize (e.g. `["PERSON", "EMAIL_ADDRESS"]`).
+            If `None`, all supported entity types are used.
+            See [Presidio supported entities](https://microsoft.github.io/presidio/supported_entities/).
+        :param score_threshold:
+            Minimum confidence score (0-1) for a detected entity to be anonymized. Defaults to `0.35`.
+            See [Presidio analyzer documentation](https://microsoft.github.io/presidio/analyzer/).
+        """
+        self.language = language
+        self.entities = entities
+        self.score_threshold = score_threshold
+        self._analyzer: AnalyzerEngine | None = None
+        self._anonymizer: AnonymizerEngine | None = None
+
+    def warm_up(self) -> None:
+        """
+        Initializes the Presidio analyzer and anonymizer engines.
+
+        This method loads the underlying NLP models and should be called before `run()`.
+        In a Haystack Pipeline, this is called automatically before the first run.
+        """
+        if self._analyzer is None:
+            self._analyzer = AnalyzerEngine()
+        if self._anonymizer is None:
+            self._anonymizer = AnonymizerEngine()
+
+    @component.output_types(documents=list[Document])
+    def run(self, documents: list[Document]) -> dict[str, list[Document]]:
+        """
+        Anonymizes PII in the provided Documents.
+
+        :param documents:
+            List of Documents whose text content will be anonymized.
+        :returns:
+            A dictionary with key `documents` containing the cleaned Documents.
+        """
+        cleaned: list[Document] = []
+        for doc in documents:
+            if doc.content is None:
+                cleaned.append(doc)
+                continue
+            if self._analyzer is None or self._anonymizer is None:
+                msg = "The component was not warmed up. Call warm_up() before running it."
+                raise RuntimeError(msg)
+            try:
+                analyzer_results = self._analyzer.analyze(
+                    text=doc.content,
+                    language=self.language,
+                    entities=self.entities,
+                    score_threshold=self.score_threshold,
+                )
+                anonymized = self._anonymizer.anonymize(text=doc.content, analyzer_results=analyzer_results)  # type: ignore[arg-type]
+                cleaned.append(Document(content=anonymized.text, meta=doc.meta.copy()))
+            except Exception as e:
+                logger.warning(
+                    "Could not anonymize document {doc_id}. Skipping it. Error: {error}",
+                    doc_id=doc.id,
+                    error=e,
+                )
+                cleaned.append(doc)
+        return {"documents": cleaned}
diff --git a/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_entity_extractor.py b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_entity_extractor.py
new file mode 100644
index 0000000000..7b1b42b3d5
--- /dev/null
+++ b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_entity_extractor.py
@@ -0,0 +1,120 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from dataclasses import replace
+
+from haystack import Document, component, logging
+from presidio_analyzer import AnalyzerEngine
+
+logger = logging.getLogger(__name__)
+
+
+@component
+class PresidioEntityExtractor:
+    """
+    Detects PII entities in Haystack Documents using [Microsoft Presidio Analyzer](https://microsoft.github.io/presidio/).
+
+    Accepts a list of Documents and returns new Documents with detected PII entities stored
+    in each Document's metadata under the key `"entities"`. Each entry in the list contains
+    the entity type, start/end character offsets, and the confidence score.
+
+    Original Documents are not mutated. Documents without text content are passed through unchanged.
+
+    Call `warm_up()` before running this component to load the Presidio analyzer engine.
+
+    ### Usage example
+
+    ```python
+    from haystack import Document
+    from haystack_integrations.components.preprocessors.presidio import PresidioEntityExtractor
+
+    extractor = PresidioEntityExtractor()
+    extractor.warm_up()
+    result = extractor.run(documents=[Document(content="Contact Alice at alice@example.com")])
+    print(result["documents"][0].meta["entities"])
+    # [{"entity_type": "PERSON", "start": 8, "end": 13, "score": 0.85},
+    #  {"entity_type": "EMAIL_ADDRESS", "start": 17, "end": 34, "score": 1.0}]
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        language: str = "en",
+        entities: list[str] | None = None,
+        score_threshold: float = 0.35,
+    ) -> None:
+        """
+        Initializes the PresidioEntityExtractor.
+
+        :param language:
+            Language code for PII detection. Defaults to `"en"`.
+            See [Presidio supported languages](https://microsoft.github.io/presidio/supported_languages/).
+        :param entities:
+            List of PII entity types to detect (e.g. `["PERSON", "EMAIL_ADDRESS"]`).
+            If `None`, all supported entity types are detected.
+            See [Presidio supported entities](https://microsoft.github.io/presidio/supported_entities/).
+        :param score_threshold:
+            Minimum confidence score (0-1) for a detected entity to be included. Defaults to `0.35`.
+            See [Presidio analyzer documentation](https://microsoft.github.io/presidio/analyzer/).
+        """
+        self.language = language
+        self.entities = entities
+        self.score_threshold = score_threshold
+        self._analyzer: AnalyzerEngine | None = None
+
+    def warm_up(self) -> None:
+        """
+        Initializes the Presidio analyzer engine.
+
+        This method loads the underlying NLP models and should be called before `run()`.
+        In a Haystack Pipeline, this is called automatically before the first run.
+        """
+        if self._analyzer is None:
+            self._analyzer = AnalyzerEngine()
+
+    @component.output_types(documents=list[Document])
+    def run(self, documents: list[Document]) -> dict[str, list[Document]]:
+        """
+        Detects PII entities in the provided Documents.
+
+        :param documents:
+            List of Documents to analyze for PII entities.
+        :returns:
+            A dictionary with key `documents` containing Documents with detected entities
+            stored in metadata under the key `"entities"`.
+        """
+        result_docs: list[Document] = []
+        for doc in documents:
+            if doc.content is None:
+                result_docs.append(doc)
+                continue
+            if self._analyzer is None:
+                msg = "The component was not warmed up. Call warm_up() before running it."
+                raise RuntimeError(msg)
+            try:
+                analyzer_results = self._analyzer.analyze(
+                    text=doc.content,
+                    language=self.language,
+                    entities=self.entities,
+                    score_threshold=self.score_threshold,
+                )
+                entities = [
+                    {
+                        "entity_type": r.entity_type,
+                        "start": r.start,
+                        "end": r.end,
+                        "score": r.score,
+                    }
+                    for r in analyzer_results
+                ]
+                result_docs.append(replace(doc, meta={**doc.meta, "entities": entities}))
+            except Exception as e:
+                logger.warning(
+                    "Could not extract entities from document {doc_id}. Skipping it. Error: {error}",
+                    doc_id=doc.id,
+                    error=e,
+                )
+                result_docs.append(doc)
+        return {"documents": result_docs}
diff --git a/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_text_cleaner.py b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_text_cleaner.py
new file mode 100644
index 0000000000..d20f889c19
--- /dev/null
+++ b/integrations/presidio/src/haystack_integrations/components/preprocessors/presidio/presidio_text_cleaner.py
@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from haystack import component, logging
+from presidio_analyzer import AnalyzerEngine
+from presidio_anonymizer import AnonymizerEngine
+
+logger = logging.getLogger(__name__)
+
+
+@component
+class PresidioTextCleaner:
+    """
+    Anonymizes PII in plain strings using [Microsoft Presidio](https://microsoft.github.io/presidio/).
+
+    Accepts a list of strings, detects personally identifiable information (PII), and returns
+    a new list of strings with PII replaced by entity type placeholders (e.g. `<PERSON>`).
+    Useful for sanitizing user queries before they are sent to an LLM.
+
+    Call `warm_up()` before running this component to load the Presidio analyzer and anonymizer engines.
+
+    ### Usage example
+
+    ```python
+    from haystack_integrations.components.preprocessors.presidio import PresidioTextCleaner
+
+    cleaner = PresidioTextCleaner()
+    cleaner.warm_up()
+    result = cleaner.run(texts=["Hi, I am John Smith, call me at 212-555-1234"])
+    print(result["texts"][0])
+    # Hi, I am <PERSON>, call me at <PHONE_NUMBER>
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        language: str = "en",
+        entities: list[str] | None = None,
+        score_threshold: float = 0.35,
+    ) -> None:
+        """
+        Initializes the PresidioTextCleaner.
+
+        :param language:
+            Language code for PII detection. Defaults to `"en"`.
+            See [Presidio supported languages](https://microsoft.github.io/presidio/supported_languages/).
+        :param entities:
+            List of PII entity types to detect and anonymize (e.g. `["PERSON", "PHONE_NUMBER"]`).
+            If `None`, all supported entity types are used.
+            See [Presidio supported entities](https://microsoft.github.io/presidio/supported_entities/).
+        :param score_threshold:
+            Minimum confidence score (0-1) for a detected entity to be anonymized. Defaults to `0.35`.
+            See [Presidio analyzer documentation](https://microsoft.github.io/presidio/analyzer/).
+        """
+        self.language = language
+        self.entities = entities
+        self.score_threshold = score_threshold
+        self._analyzer: AnalyzerEngine | None = None
+        self._anonymizer: AnonymizerEngine | None = None
+
+    def warm_up(self) -> None:
+        """
+        Initializes the Presidio analyzer and anonymizer engines.
+
+        This method loads the underlying NLP models and should be called before `run()`.
+        In a Haystack Pipeline, this is called automatically before the first run.
+        """
+        if self._analyzer is None:
+            self._analyzer = AnalyzerEngine()
+        if self._anonymizer is None:
+            self._anonymizer = AnonymizerEngine()
+
+    @component.output_types(texts=list[str])
+    def run(self, texts: list[str]) -> dict[str, list[str]]:
+        """
+        Anonymizes PII in the provided strings.
+
+        :param texts:
+            List of strings to anonymize.
+        :returns:
+            A dictionary with key `texts` containing the cleaned strings.
+        """
+        if self._analyzer is None or self._anonymizer is None:
+            msg = "The component was not warmed up. Call warm_up() before running it."
+            raise RuntimeError(msg)
+        cleaned: list[str] = []
+        for text in texts:
+            try:
+                analyzer_results = self._analyzer.analyze(
+                    text=text,
+                    language=self.language,
+                    entities=self.entities,
+                    score_threshold=self.score_threshold,
+                )
+                anonymized = self._anonymizer.anonymize(text=text, analyzer_results=analyzer_results)  # type: ignore[arg-type]
+                cleaned.append(anonymized.text)
+            except Exception as e:
+                logger.warning(
+                    "Could not anonymize text. Skipping it. Error: {error}",
+                    error=e,
+                )
+                cleaned.append(text)
+        return {"texts": cleaned}
diff --git a/integrations/presidio/src/haystack_integrations/components/preprocessors/py.typed b/integrations/presidio/src/haystack_integrations/components/preprocessors/py.typed
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integrations/presidio/tests/test_presidio_document_cleaner.py b/integrations/presidio/tests/test_presidio_document_cleaner.py
new file mode 100644
index 0000000000..7c1d35107f
--- /dev/null
+++ b/integrations/presidio/tests/test_presidio_document_cleaner.py
@@ -0,0 +1,157 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from unittest.mock import MagicMock
+
+import pytest
+from haystack import Document
+from haystack.core.serialization import component_from_dict, component_to_dict
+
+from haystack_integrations.components.preprocessors.presidio import PresidioDocumentCleaner
+
+
+class TestPresidioDocumentCleaner:
+    def test_init_defaults(self):
+        cleaner = PresidioDocumentCleaner()
+        assert cleaner.language == "en"
+        assert cleaner.entities is None
+        assert cleaner.score_threshold == 0.35
+
+    def test_init_custom_params(self):
+        cleaner = PresidioDocumentCleaner(language="de", entities=["PERSON"], score_threshold=0.7)
+        assert cleaner.language == "de"
+        assert cleaner.entities == ["PERSON"]
+        assert cleaner.score_threshold == 0.7
+
+    def test_to_dict(self):
+        cleaner = PresidioDocumentCleaner(language="en", entities=["EMAIL_ADDRESS"], score_threshold=0.5)
+        data = component_to_dict(cleaner, "PresidioDocumentCleaner")
+        expected_type = (
+            "haystack_integrations.components.preprocessors.presidio.presidio_document_cleaner.PresidioDocumentCleaner"
+        )
+        assert data["type"] == expected_type
+        assert data["init_parameters"]["language"] == "en"
+        assert data["init_parameters"]["entities"] == ["EMAIL_ADDRESS"]
+        assert data["init_parameters"]["score_threshold"] == 0.5
+
+    def test_from_dict(self):
+        data = {
+            "type": (
+                "haystack_integrations.components.preprocessors.presidio"
+                ".presidio_document_cleaner.PresidioDocumentCleaner"
+            ),
+            "init_parameters": {"language": "de", "entities": ["PERSON"], "score_threshold": 0.6},
+        }
+        cleaner = component_from_dict(PresidioDocumentCleaner, data, "PresidioDocumentCleaner")
+        assert cleaner.language == "de"
+        assert cleaner.entities == ["PERSON"]
+        assert cleaner.score_threshold == 0.6
+
+    def test_run_anonymizes_pii(self):
+        cleaner = PresidioDocumentCleaner()
+        mock_result = MagicMock()
+        mock_result.text = "My name is <PERSON> and email is <EMAIL_ADDRESS>"
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        docs = [Document(content="My name is John and email is john@example.com")]
+        result = cleaner.run(documents=docs)
+
+        assert len(result["documents"]) == 1
+        assert result["documents"][0].content == "My name is <PERSON> and email is <EMAIL_ADDRESS>"
+
+    def test_run_preserves_metadata(self):
+        cleaner = PresidioDocumentCleaner()
+        mock_result = MagicMock()
+        mock_result.text = "Hello <PERSON>"
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        docs = [Document(content="Hello John", meta={"source": "email", "page": 1})]
+        result = cleaner.run(documents=docs)
+
+        assert result["documents"][0].meta["source"] == "email"
+        assert result["documents"][0].meta["page"] == 1
+
+    def test_run_does_not_mutate_original(self):
+        cleaner = PresidioDocumentCleaner()
+        mock_result = MagicMock()
+        mock_result.text = "Hello <PERSON>"
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        original = Document(content="Hello John")
+        cleaner.run(documents=[original])
+
+        assert original.content == "Hello John"
+
+    def test_run_passes_through_none_content(self):
+        cleaner = PresidioDocumentCleaner()
+        doc = Document(content=None, meta={"source": "test"})
+        result = cleaner.run(documents=[doc])
+
+        assert len(result["documents"]) == 1
+        assert result["documents"][0].content is None
+        assert result["documents"][0].meta["source"] == "test"
+
+    def test_run_skips_on_error(self, caplog):
+        cleaner = PresidioDocumentCleaner()
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.side_effect = Exception("Analyzer error")
+        cleaner._anonymizer = MagicMock()
+
+        doc = Document(content="Some text with PII")
+        with caplog.at_level(logging.WARNING):
+            result = cleaner.run(documents=[doc])
+
+        assert len(result["documents"]) == 1
+        assert result["documents"][0].content == "Some text with PII"
+        assert "Could not anonymize" in caplog.text
+
+    def test_run_multiple_documents(self):
+        cleaner = PresidioDocumentCleaner()
+        mock_result = MagicMock()
+        mock_result.text = "cleaned"
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        docs = [Document(content=f"doc {i}") for i in range(3)]
+        result = cleaner.run(documents=docs)
+
+        assert len(result["documents"]) == 3
+
+    def test_run_passes_language_and_entities_to_analyzer(self):
+        cleaner = PresidioDocumentCleaner(language="de", entities=["PERSON"], score_threshold=0.8)
+        mock_result = MagicMock()
+        mock_result.text = "cleaned"
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        cleaner.run(documents=[Document(content="Hello John")])
+
+        cleaner._analyzer.analyze.assert_called_once_with(
+            text="Hello John", language="de", entities=["PERSON"], score_threshold=0.8
+        )
+
+    @pytest.mark.integration
+    def test_run_integration(self):
+        cleaner = PresidioDocumentCleaner()
+        cleaner.warm_up()
+        docs = [Document(content="My name is John Smith and my email is john@example.com")]
+        result = cleaner.run(documents=docs)
+
+        assert len(result["documents"]) == 1
+        assert "John Smith" not in result["documents"][0].content
+        assert "john@example.com" not in result["documents"][0].content
diff --git a/integrations/presidio/tests/test_presidio_entity_extractor.py b/integrations/presidio/tests/test_presidio_entity_extractor.py
new file mode 100644
index 0000000000..77d73a0250
--- /dev/null
+++ b/integrations/presidio/tests/test_presidio_entity_extractor.py
@@ -0,0 +1,125 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from unittest.mock import MagicMock
+
+import pytest
+from haystack import Document
+from haystack.core.serialization import component_from_dict, component_to_dict
+
+from haystack_integrations.components.preprocessors.presidio import PresidioEntityExtractor
+
+
+class TestPresidioEntityExtractor:
+    def test_init_defaults(self):
+        extractor = PresidioEntityExtractor()
+        assert extractor.language == "en"
+        assert extractor.entities is None
+        assert extractor.score_threshold == 0.35
+
+    def test_to_dict(self):
+        extractor = PresidioEntityExtractor(language="en", entities=["PERSON"], score_threshold=0.6)
+        data = component_to_dict(extractor, "PresidioEntityExtractor")
+        expected_type = (
+            "haystack_integrations.components.preprocessors.presidio.presidio_entity_extractor.PresidioEntityExtractor"
+        )
+        assert data["type"] == expected_type
+        assert data["init_parameters"]["entities"] == ["PERSON"]
+        assert data["init_parameters"]["score_threshold"] == 0.6
+
+    def test_from_dict(self):
+        data = {
+            "type": (
+                "haystack_integrations.components.preprocessors.presidio"
+                ".presidio_entity_extractor.PresidioEntityExtractor"
+            ),
+            "init_parameters": {"language": "en", "entities": ["EMAIL_ADDRESS"], "score_threshold": 0.5},
+        }
+        extractor = component_from_dict(PresidioEntityExtractor, data, "PresidioEntityExtractor")
+        assert extractor.entities == ["EMAIL_ADDRESS"]
+
+    def test_run_extracts_entities_into_metadata(self):
+        extractor = PresidioEntityExtractor()
+        mock_entity = MagicMock()
+        mock_entity.entity_type = "PERSON"
+        mock_entity.start = 11
+        mock_entity.end = 15
+        mock_entity.score = 0.85
+        extractor._analyzer = MagicMock()
+        extractor._analyzer.analyze.return_value = [mock_entity]
+
+        docs = [Document(content="My name is John")]
+        result = extractor.run(documents=docs)
+
+        entities = result["documents"][0].meta["entities"]
+        assert len(entities) == 1
+        assert entities[0]["entity_type"] == "PERSON"
+        assert entities[0]["start"] == 11
+        assert entities[0]["end"] == 15
+        assert entities[0]["score"] == 0.85
+
+    def test_run_does_not_mutate_original(self):
+        extractor = PresidioEntityExtractor()
+        extractor._analyzer = MagicMock()
+        extractor._analyzer.analyze.return_value = []
+
+        original = Document(content="Hello John", meta={"source": "test"})
+        extractor.run(documents=[original])
+
+        assert "entities" not in original.meta
+
+    def test_run_passes_through_none_content(self):
+        extractor = PresidioEntityExtractor()
+        doc = Document(content=None, meta={"source": "test"})
+        result = extractor.run(documents=[doc])
+
+        assert result["documents"][0].content is None
+        assert "entities" not in result["documents"][0].meta
+
+    def test_run_empty_entities(self):
+        extractor = PresidioEntityExtractor()
+        extractor._analyzer = MagicMock()
+        extractor._analyzer.analyze.return_value = []
+
+        docs = [Document(content="No PII here")]
+        result = extractor.run(documents=docs)
+
+        assert result["documents"][0].meta["entities"] == []
+
+    def test_run_skips_on_error(self, caplog):
+        extractor = PresidioEntityExtractor()
+        extractor._analyzer = MagicMock()
+        extractor._analyzer.analyze.side_effect = Exception("Analyzer error")
+
+        doc = Document(content="Some text")
+        with caplog.at_level(logging.WARNING):
+            result = extractor.run(documents=[doc])
+
+        assert result["documents"][0].content == "Some text"
+        assert "entities" not in result["documents"][0].meta
+        assert "Could not extract entities" in caplog.text
+
+    def test_run_preserves_existing_metadata(self):
+        extractor = PresidioEntityExtractor()
+        extractor._analyzer = MagicMock()
+        extractor._analyzer.analyze.return_value = []
+
+        docs = [Document(content="Hello", meta={"page": 3, "author": "Bob"})]
+        result = extractor.run(documents=docs)
+
+        assert result["documents"][0].meta["page"] == 3
+        assert result["documents"][0].meta["author"] == "Bob"
+        assert result["documents"][0].meta["entities"] == []
+
+    @pytest.mark.integration
+    def test_run_integration(self):
+        extractor = PresidioEntityExtractor()
+        extractor.warm_up()
+        docs = [Document(content="Contact Alice at alice@example.com")]
+        result = extractor.run(documents=docs)
+
+        entities = result["documents"][0].meta["entities"]
+        entity_types = [e["entity_type"] for e in entities]
+        assert "EMAIL_ADDRESS" in entity_types
diff --git a/integrations/presidio/tests/test_presidio_text_cleaner.py b/integrations/presidio/tests/test_presidio_text_cleaner.py
new file mode 100644
index 0000000000..030ac5057b
--- /dev/null
+++ b/integrations/presidio/tests/test_presidio_text_cleaner.py
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from unittest.mock import MagicMock
+
+import pytest
+from haystack.core.serialization import component_from_dict, component_to_dict
+
+from haystack_integrations.components.preprocessors.presidio import PresidioTextCleaner
+
+
+class TestPresidioTextCleaner:
+    def test_init_defaults(self):
+        cleaner = PresidioTextCleaner()
+        assert cleaner.language == "en"
+        assert cleaner.entities is None
+        assert cleaner.score_threshold == 0.35
+
+    def test_to_dict(self):
+        cleaner = PresidioTextCleaner(language="en", entities=["PHONE_NUMBER"], score_threshold=0.5)
+        data = component_to_dict(cleaner, "PresidioTextCleaner")
+        assert (
+            data["type"]
+            == "haystack_integrations.components.preprocessors.presidio.presidio_text_cleaner.PresidioTextCleaner"
+        )
+        assert data["init_parameters"]["entities"] == ["PHONE_NUMBER"]
+
+    def test_from_dict(self):
+        data = {
+            "type": "haystack_integrations.components.preprocessors.presidio.presidio_text_cleaner.PresidioTextCleaner",
+            "init_parameters": {"language": "en", "entities": None, "score_threshold": 0.4},
+        }
+        cleaner = component_from_dict(PresidioTextCleaner, data, "PresidioTextCleaner")
+        assert cleaner.score_threshold == 0.4
+
+    def test_run_anonymizes_pii(self):
+        cleaner = PresidioTextCleaner()
+        mock_result = MagicMock()
+        mock_result.text = "Call me at <PHONE_NUMBER>"
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        result = cleaner.run(texts=["Call me at 212-555-1234"])
+
+        assert result["texts"][0] == "Call me at <PHONE_NUMBER>"
+
+    def test_run_multiple_texts(self):
+        cleaner = PresidioTextCleaner()
+        mock_result = MagicMock()
+        mock_result.text = "cleaned"
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        result = cleaner.run(texts=["text 1", "text 2", "text 3"])
+
+        assert len(result["texts"]) == 3
+
+    def test_run_skips_on_error(self, caplog):
+        cleaner = PresidioTextCleaner()
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.side_effect = Exception("error")
+        cleaner._anonymizer = MagicMock()
+
+        with caplog.at_level(logging.WARNING):
+            result = cleaner.run(texts=["My name is John"])
+
+        assert result["texts"][0] == "My name is John"
+        assert "Could not anonymize" in caplog.text
+
+    def test_run_empty_text(self):
+        cleaner = PresidioTextCleaner()
+        mock_result = MagicMock()
+        mock_result.text = ""
+        cleaner._anonymizer = MagicMock()
+        cleaner._anonymizer.anonymize.return_value = mock_result
+        cleaner._analyzer = MagicMock()
+        cleaner._analyzer.analyze.return_value = []
+
+        result = cleaner.run(texts=[""])
+
+        assert result["texts"][0] == ""
+
+    @pytest.mark.integration
+    def test_run_integration(self):
+        cleaner = PresidioTextCleaner()
+        cleaner.warm_up()
+        result = cleaner.run(texts=["Hi, I am Alice and my phone is 212-555-5678"])
+
+        assert len(result["texts"]) == 1
+        assert "Alice" not in result["texts"][0]
+        assert "212-555-5678" not in result["texts"][0]