diff --git a/docs-website/docs/pipeline-components/classifiers/documentlanguageclassifier.mdx b/docs-website/docs/pipeline-components/classifiers/documentlanguageclassifier.mdx index fd45d70906..6cfffcdfe7 100644 --- a/docs-website/docs/pipeline-components/classifiers/documentlanguageclassifier.mdx +++ b/docs-website/docs/pipeline-components/classifiers/documentlanguageclassifier.mdx @@ -16,9 +16,9 @@ Use this component to classify documents by language and add language informatio | **Most common position in a pipeline** | Before [`MetadataRouter`](../routers/metadatarouter.mdx) | | **Mandatory run variables** | `documents`: A list of documents | | **Output variables** | `documents`: A list of documents | -| **API reference** | [Classifiers](/reference/classifiers-api) | -| **GitHub link** | https://github.com/deepset-ai/haystack/blob/main/haystack/components/classifiers/document_language_classifier.py | -| **Package name** | `haystack-ai` | +| **API reference** | [Langdetect](/reference/integrations-langdetect) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langdetect | +| **Package name** | `langdetect-haystack` | @@ -34,10 +34,10 @@ For classifying and then routing plain text using the same logic, use the `TextL ## Usage -Install the `langdetect`package to use the `DocumentLanguageClassifier`component: +Install the `langdetect-haystack` package to use the `DocumentLanguageClassifier` component: -```shell shell -pip install langdetect +```shell +pip install langdetect-haystack ``` ### On its own @@ -45,7 +45,9 @@ pip install langdetect Below, we are using the `DocumentLanguageClassifier` to classify English and German documents: ```python -from haystack.components.classifiers import DocumentLanguageClassifier +from haystack_integrations.components.classifiers.langdetect import ( + DocumentLanguageClassifier, +) from haystack import Document documents = [ @@ -69,7 +71,9 @@ Below, we are using the `DocumentLanguageClassifier` in an indexing pipeline tha from haystack import Pipeline from haystack import Document from haystack.document_stores.in_memory import InMemoryDocumentStore -from haystack.components.classifiers import DocumentLanguageClassifier +from haystack_integrations.components.classifiers.langdetect import ( + DocumentLanguageClassifier, +) from haystack.components.embedders import SentenceTransformersDocumentEmbedder from haystack.components.writers import DocumentWriter from haystack.components.routers import MetadataRouter diff --git a/docs-website/docs/pipeline-components/routers/textlanguagerouter.mdx b/docs-website/docs/pipeline-components/routers/textlanguagerouter.mdx index 0ba4d53540..a0ee5fc3a9 100644 --- a/docs-website/docs/pipeline-components/routers/textlanguagerouter.mdx +++ b/docs-website/docs/pipeline-components/routers/textlanguagerouter.mdx @@ -17,9 +17,9 @@ Use this component in pipelines to route a query based on its language. | **Mandatory init variables** | `languages`: A list of ISO language codes | | **Mandatory run variables** | `text`: A string | | **Output variables** | `unmatched`: A string

``: A string (where `` is defined during initialization). For example: `fr`: French language string. | -| **API reference** | [Routers](/reference/routers-api) | -| **GitHub link** | https://github.com/deepset-ai/haystack/blob/main/haystack/components/routers/text_language_router.py | -| **Package name** | `haystack-ai` | +| **API reference** | [Langdetect](/reference/integrations-langdetect) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langdetect | +| **Package name** | `langdetect-haystack` | @@ -33,12 +33,18 @@ The components parameter `languages` must be a list of languages in ISO code, su ## Usage +Install the `langdetect-haystack` package to use the `TextLanguageRouter` component: + +```shell +pip install langdetect-haystack +``` + ### On its own Below is an example where using the `TextLanguageRouter` to route only French texts to an output connection named `fr`. Other texts, such as the English text below, are routed to an output named `unmatched`. ```python -from haystack.components.routers import TextLanguageRouter +from haystack_integrations.components.routers.langdetect import TextLanguageRouter router = TextLanguageRouter(languages=["fr"]) router.run(text="What's your query?") @@ -50,7 +56,7 @@ Below is an example of a query pipeline that uses a `TextLanguageRouter` to forw ```python from haystack import Pipeline -from haystack.components.routers import TextLanguageRouter +from haystack_integrations.components.routers.langdetect import TextLanguageRouter from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.retrievers.in_memory import InMemoryBM25Retriever diff --git a/docs-website/versioned_docs/version-2.30/pipeline-components/classifiers/documentlanguageclassifier.mdx b/docs-website/versioned_docs/version-2.30/pipeline-components/classifiers/documentlanguageclassifier.mdx index fd45d70906..6cfffcdfe7 100644 --- a/docs-website/versioned_docs/version-2.30/pipeline-components/classifiers/documentlanguageclassifier.mdx +++ b/docs-website/versioned_docs/version-2.30/pipeline-components/classifiers/documentlanguageclassifier.mdx @@ -16,9 +16,9 @@ Use this component to classify documents by language and add language informatio | **Most common position in a pipeline** | Before [`MetadataRouter`](../routers/metadatarouter.mdx) | | **Mandatory run variables** | `documents`: A list of documents | | **Output variables** | `documents`: A list of documents | -| **API reference** | [Classifiers](/reference/classifiers-api) | -| **GitHub link** | https://github.com/deepset-ai/haystack/blob/main/haystack/components/classifiers/document_language_classifier.py | -| **Package name** | `haystack-ai` | +| **API reference** | [Langdetect](/reference/integrations-langdetect) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langdetect | +| **Package name** | `langdetect-haystack` | @@ -34,10 +34,10 @@ For classifying and then routing plain text using the same logic, use the `TextL ## Usage -Install the `langdetect`package to use the `DocumentLanguageClassifier`component: +Install the `langdetect-haystack` package to use the `DocumentLanguageClassifier` component: -```shell shell -pip install langdetect +```shell +pip install langdetect-haystack ``` ### On its own @@ -45,7 +45,9 @@ pip install langdetect Below, we are using the `DocumentLanguageClassifier` to classify English and German documents: ```python -from haystack.components.classifiers import DocumentLanguageClassifier +from haystack_integrations.components.classifiers.langdetect import ( + DocumentLanguageClassifier, +) from haystack import Document documents = [ @@ -69,7 +71,9 @@ Below, we are using the `DocumentLanguageClassifier` in an indexing pipeline tha from haystack import Pipeline from haystack import Document from haystack.document_stores.in_memory import InMemoryDocumentStore -from haystack.components.classifiers import DocumentLanguageClassifier +from haystack_integrations.components.classifiers.langdetect import ( + DocumentLanguageClassifier, +) from haystack.components.embedders import SentenceTransformersDocumentEmbedder from haystack.components.writers import DocumentWriter from haystack.components.routers import MetadataRouter diff --git a/docs-website/versioned_docs/version-2.30/pipeline-components/routers/textlanguagerouter.mdx b/docs-website/versioned_docs/version-2.30/pipeline-components/routers/textlanguagerouter.mdx index 0ba4d53540..a0ee5fc3a9 100644 --- a/docs-website/versioned_docs/version-2.30/pipeline-components/routers/textlanguagerouter.mdx +++ b/docs-website/versioned_docs/version-2.30/pipeline-components/routers/textlanguagerouter.mdx @@ -17,9 +17,9 @@ Use this component in pipelines to route a query based on its language. | **Mandatory init variables** | `languages`: A list of ISO language codes | | **Mandatory run variables** | `text`: A string | | **Output variables** | `unmatched`: A string

``: A string (where `` is defined during initialization). For example: `fr`: French language string. | -| **API reference** | [Routers](/reference/routers-api) | -| **GitHub link** | https://github.com/deepset-ai/haystack/blob/main/haystack/components/routers/text_language_router.py | -| **Package name** | `haystack-ai` | +| **API reference** | [Langdetect](/reference/integrations-langdetect) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langdetect | +| **Package name** | `langdetect-haystack` | @@ -33,12 +33,18 @@ The components parameter `languages` must be a list of languages in ISO code, su ## Usage +Install the `langdetect-haystack` package to use the `TextLanguageRouter` component: + +```shell +pip install langdetect-haystack +``` + ### On its own Below is an example where using the `TextLanguageRouter` to route only French texts to an output connection named `fr`. Other texts, such as the English text below, are routed to an output named `unmatched`. ```python -from haystack.components.routers import TextLanguageRouter +from haystack_integrations.components.routers.langdetect import TextLanguageRouter router = TextLanguageRouter(languages=["fr"]) router.run(text="What's your query?") @@ -50,7 +56,7 @@ Below is an example of a query pipeline that uses a `TextLanguageRouter` to forw ```python from haystack import Pipeline -from haystack.components.routers import TextLanguageRouter +from haystack_integrations.components.routers.langdetect import TextLanguageRouter from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack.components.retrievers.in_memory import InMemoryBM25Retriever diff --git a/haystack/components/classifiers/document_language_classifier.py b/haystack/components/classifiers/document_language_classifier.py index 64d4703d8a..d285a0da47 100644 --- a/haystack/components/classifiers/document_language_classifier.py +++ b/haystack/components/classifiers/document_language_classifier.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import warnings from dataclasses import replace from haystack import Document, component, logging @@ -68,6 +69,14 @@ def __init__(self, languages: list[str] | None = None) -> None: See the supported languages in [`langdetect` documentation](https://github.com/Mimino666/langdetect#languages). If not specified, defaults to ["en"]. """ + warnings.warn( + "`DocumentLanguageClassifier` will be removed from Haystack in version 3.0, as it is moving to " + "the `langdetect-haystack` package. To continue using it, install that package with " + "`pip install langdetect-haystack` and update your import to " + "`from haystack_integrations.components.classifiers.langdetect import DocumentLanguageClassifier`.", + FutureWarning, + stacklevel=2, + ) langdetect_import.check() if not languages: languages = ["en"] diff --git a/haystack/components/routers/text_language_router.py b/haystack/components/routers/text_language_router.py index d971e8c2d8..08dfd38e6b 100644 --- a/haystack/components/routers/text_language_router.py +++ b/haystack/components/routers/text_language_router.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 +import warnings + from haystack import component, logging from haystack.lazy_imports import LazyImport @@ -53,6 +55,14 @@ def __init__(self, languages: list[str] | None = None) -> None: See the supported languages in [`langdetect` documentation](https://github.com/Mimino666/langdetect#languages). If not specified, defaults to ["en"]. """ + warnings.warn( + "`TextLanguageRouter` will be removed from Haystack in version 3.0, as it is moving to " + "the `langdetect-haystack` package. To continue using it, install that package with " + "`pip install langdetect-haystack` and update your import to " + "`from haystack_integrations.components.routers.langdetect import TextLanguageRouter`.", + FutureWarning, + stacklevel=2, + ) langdetect_import.check() if not languages: languages = ["en"] diff --git a/releasenotes/notes/deprecate-langdetect-components-7ff32c5b6d139a39.yaml b/releasenotes/notes/deprecate-langdetect-components-7ff32c5b6d139a39.yaml new file mode 100644 index 0000000000..97130ae733 --- /dev/null +++ b/releasenotes/notes/deprecate-langdetect-components-7ff32c5b6d139a39.yaml @@ -0,0 +1,11 @@ +--- +deprecations: + - | + ``DocumentLanguageClassifier`` and ``TextLanguageRouter`` are deprecated and will be removed from + Haystack in version 3.0. They are moving to the ``langdetect-haystack`` package. To continue using them, + install the package with ``pip install langdetect-haystack`` and update your imports as follows: + + .. code-block:: python + + from haystack_integrations.components.classifiers.langdetect import DocumentLanguageClassifier + from haystack_integrations.components.routers.langdetect import TextLanguageRouter