diff --git a/.github/labeler.yml b/.github/labeler.yml index 985e7cc1f7..ff7e8f133c 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -39,6 +39,11 @@ integration:fastembed: - any-glob-to-any-file: "integrations/fastembed/**/*" - any-glob-to-any-file: ".github/workflows/fastembed.yml" +integration:github: + - changed-files: + - any-glob-to-any-file: "integrations/github/**/*" + - any-glob-to-any-file: ".github/workflows/github.yml" + integration:google-ai: - changed-files: - any-glob-to-any-file: "integrations/google_ai/**/*" diff --git a/.github/workflows/github.yml b/.github/workflows/github.yml new file mode 100644 index 0000000000..21c9ba44a1 --- /dev/null +++ b/.github/workflows/github.yml @@ -0,0 +1,82 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / github + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/github/**" + - "!integrations/github/*.md" + - ".github/workflows/github.yml" + +defaults: + run: + working-directory: integrations/github + +concurrency: + group: github-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + +jobs: + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ["3.9", "3.13"] + + steps: + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install --upgrade hatch + + - name: Lint + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run lint:all + + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + + - name: Run tests + run: hatch run cov-retry + + - name: Run unit tests with lowest direct dependencies + run: | + hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt + hatch run uv pip install -r requirements_lowest_direct.txt + hatch run test -m "not integration" + + - name: Nightly - run unit tests with Haystack main branch + if: github.event_name == 'schedule' + run: | + hatch env prune + hatch run uv pip install git+https://github.com/deepset-ai/haystack.git@main + hatch run cov-retry -m "not integration" + + - name: Send event to Datadog for nightly failures + if: failure() && github.event_name == 'schedule' + uses: ./.github/actions/send_failure + with: + title: | + Core integrations nightly tests failure: ${{ github.workflow }} + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} diff --git a/README.md b/README.md index e930803a7c..4a774885c2 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta | [deepeval-haystack](integrations/deepeval/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/deepeval-haystack.svg)](https://pypi.org/project/deepeval-haystack) | [![Test / deepeval](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml) | | [elasticsearch-haystack](integrations/elasticsearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack) | [![Test / elasticsearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml) | | [fastembed-haystack](integrations/fastembed/) | Embedder, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/fastembed-haystack.svg)](https://pypi.org/project/fastembed-haystack/) | [![Test / fastembed](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml) | +| [github-haystack](integrations/github/) | Connector | [![PyPI - Version](https://img.shields.io/pypi/v/github-haystack.svg)](https://pypi.org/project/github-haystack) | [![Test / github](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/github.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/github.yml) | | [google-ai-haystack](integrations/google_ai/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/google-ai-haystack.svg)](https://pypi.org/project/google-ai-haystack) | [![Test / google-ai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml) | | [google-vertex-haystack](integrations/google_vertex/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack) | [![Test / google-vertex](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml) | | [instructor-embedders-haystack](integrations/instructor_embedders/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack) | [![Test / instructor-embedders](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml) | diff --git a/integrations/github/LICENSE.txt b/integrations/github/LICENSE.txt new file mode 100644 index 0000000000..137069b823 --- /dev/null +++ b/integrations/github/LICENSE.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/integrations/github/README.md b/integrations/github/README.md new file mode 100644 index 0000000000..a816d2c6d0 --- /dev/null +++ b/integrations/github/README.md @@ -0,0 +1,21 @@ +# github-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/github-haystack.svg)](https://pypi.org/project/github-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/github-haystack.svg)](https://pypi.org/project/github-haystack) + +----- + +## Table of Contents + +- [Installation](#installation) +- [License](#license) + +## Installation + +```console +pip install github-haystack +``` + +## License + +`github-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/github/pydoc/config.yml b/integrations/github/pydoc/config.yml new file mode 100644 index 0000000000..040be3b93f --- /dev/null +++ b/integrations/github/pydoc/config.yml @@ -0,0 +1,34 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.connectors.github.file_editor", + "haystack_integrations.components.connectors.github.issue_commenter", + "haystack_integrations.components.connectors.github.issue_viewer", + "haystack_integrations.components.connectors.github.pr_creator", + "haystack_integrations.components.connectors.github.repo_viewer", + "haystack_integrations.components.connectors.github.repo_forker", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer + excerpt: GitHub integration for Haystack + category_slug: integrations-api + title: GitHub + slug: integrations-github + order: 100 + markdown: + descriptive_class_title: false + classdef_code_block: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_github.md \ No newline at end of file diff --git a/integrations/github/pyproject.toml b/integrations/github/pyproject.toml new file mode 100644 index 0000000000..48ea0034db --- /dev/null +++ b/integrations/github/pyproject.toml @@ -0,0 +1,176 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "github-haystack" +dynamic = ["version"] +description = 'Haystack components for interacting with GitHub repositories' +readme = "README.md" +requires-python = ">=3.9" +license = "Apache-2.0" +keywords = [] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = ["haystack-ai>=2.12.0"] + +[project.urls] +Source = "https://github.com/deepset-ai/haystack-core-integrations/github" +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/github/README.md" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" + +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + +[tool.hatch.version] +source = "vcs" +tag-pattern = 'integrations\/github-v(?P.*)' + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/github-v[0-9]*"' + +[tool.hatch.envs.default] +installer = "uv" +dependencies = [ + "coverage[toml]>=6.5", + "pytest", + "pytest-rerunfailures", + "haystack-pydoc-tools", + "pytz", + ] + +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" +cov-report = ["- coverage combine", "coverage report"] +cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] +docs = ["pydoc-markdown pydoc/config.yml"] +fix = "ruff check --fix" + +[tool.hatch.envs.lint] +installer = "uv" +detached = true +dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"] +[tool.hatch.envs.lint.scripts] +typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" +style = [ + "ruff check {args:}", + "black --check --diff {args:.}", +] +fmt = ["black {args:.}", "ruff check --fix {args:}", "style"] +all = ["style", "typing"] + +[tool.black] +target-version = ["py38"] +line-length = 120 +skip-string-normalization = true + +[tool.ruff] +target-version = "py38" +line-length = 120 + +[tool.ruff.lint] +select = [ + "A", + "ARG", + "B", + "C", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", + # Misc + "B008", + "S101", +] +unfixable = [ + # Don't touch unused imports + "F401", +] + +[tool.ruff.lint.isort] +known-first-party = ["haystack_integrations"] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] +# Ignore RUF001 for all files in the prompts directory +"src/haystack_integrations/components/prompts/**/*" = ["RUF001"] + +[tool.coverage.run] +source = ["haystack_integrations"] +branch = true +parallel = false + + +[tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing = true +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] + + +[[tool.mypy.overrides]] +module = [ + "github.*", + "haystack.*", + "haystack_integrations.*", + "openai.*", + "pytest.*", + "numpy.*", +] +ignore_missing_imports = true + +[tool.pytest.ini_options] +addopts = "--strict-markers" +markers = [ + "integration: integration tests", + "unit: unit tests", +] +log_cli = true diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py new file mode 100644 index 0000000000..ea4a4969b4 --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .file_editor import Command, GitHubFileEditor +from .issue_commenter import GitHubIssueCommenter +from .issue_viewer import GitHubIssueViewer +from .pr_creator import GitHubPRCreator +from .repo_forker import GitHubRepoForker +from .repo_viewer import GitHubRepoViewer + +__all__ = [ + "Command", + "GitHubFileEditor", + "GitHubIssueCommenter", + "GitHubIssueViewer", + "GitHubPRCreator", + "GitHubRepoForker", + "GitHubRepoViewer", +] diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py new file mode 100644 index 0000000000..2f543b7b72 --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -0,0 +1,303 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from base64 import b64decode, b64encode +from enum import Enum +from typing import Any, Dict, Optional, Union + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +class Command(str, Enum): + """ + Available commands for file operations in GitHub. + + Attributes: + EDIT: Edit an existing file by replacing content + UNDO: Revert the last commit if made by the same user + CREATE: Create a new file + DELETE: Delete an existing file + """ + + EDIT = "edit" + UNDO = "undo" + CREATE = "create" + DELETE = "delete" + + +@component +class GitHubFileEditor: + """ + A Haystack component for editing files in GitHub repositories. + + Supports editing, undoing changes, deleting files, and creating new files + through the GitHub API. + + ### Usage example + ```python + from haystack_integrations.components.connectors.github import Command, GitHubFileEditor + from haystack.utils import Secret + + # Initialize with default repo and branch + editor = GitHubFileEditor( + github_token=Secret.from_env_var("GITHUB_TOKEN"), + repo="owner/repo", + branch="main" + ) + + # Edit a file using default repo and branch + result = editor.run( + command=Command.EDIT, + payload={ + "path": "path/to/file.py", + "original": "def old_function():", + "replacement": "def new_function():", + "message": "Renamed function for clarity" + } + ) + + # Edit a file in a different repo/branch + result = editor.run( + command=Command.EDIT, + repo="other-owner/other-repo", # Override default repo + branch="feature", # Override default branch + payload={ + "path": "path/to/file.py", + "original": "def old_function():", + "replacement": "def new_function():", + "message": "Renamed function for clarity" + } + ) + ``` + """ + + def __init__( + self, + *, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + repo: Optional[str] = None, + branch: str = "main", + raise_on_failure: bool = True, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + + :raises TypeError: If github_token is not a Secret + """ + if not isinstance(github_token, Secret): + error_message = "github_token must be a Secret" + raise TypeError(error_message) + + self.github_token = github_token + self.default_repo = repo + self.default_branch = branch + self.raise_on_failure = raise_on_failure + + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubFileEditor", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: + """Get file content and SHA from GitHub.""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + response = requests.get(url, headers=self._get_request_headers(), params={"ref": branch}, timeout=10) + response.raise_for_status() + data = response.json() + content = b64decode(data["content"]).decode("utf-8") + return content, data["sha"] + + def _update_file(self, owner: str, repo: str, path: str, content: str, message: str, sha: str, branch: str) -> bool: + """Update file content on GitHub.""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + payload = { + "message": message, + "content": b64encode(content.encode("utf-8")).decode("utf-8"), + "sha": sha, + "branch": branch, + } + response = requests.put(url, headers=self._get_request_headers(), json=payload, timeout=10) + response.raise_for_status() + return True + + def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: + """Check if last commit was made by the current token user.""" + url = f"https://api.github.com/repos/{owner}/{repo}/commits" + params: Dict[str, Union[str, int]] = {"per_page": 1, "sha": branch} + response = requests.get(url, headers=self._get_request_headers(), params=params, timeout=10) + response.raise_for_status() + last_commit = response.json()[0] + commit_author = last_commit["author"]["login"] + + # Get current user + user_response = requests.get("https://api.github.com/user", headers=self._get_request_headers(), timeout=10) + user_response.raise_for_status() + current_user = user_response.json()["login"] + + return commit_author == current_user + + def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file editing.""" + try: + content, sha = self._get_file_content(owner, repo, payload["path"], branch) + + # Check if original string is unique + occurrences = content.count(payload["original"]) + if occurrences == 0: + return "Error: Original string not found in file" + if occurrences > 1: + return "Error: Original string appears multiple times. Please provide more context" + + # Perform the replacement + new_content = content.replace(payload["original"], payload["replacement"]) + success = self._update_file(owner, repo, payload["path"], new_content, payload["message"], sha, branch) + return "Edit successful" if success else "Edit failed" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {e!s}" + + def _undo_changes(self, owner: str, repo: str, payload: Dict[str, Any], branch: str) -> str: + """Handle undoing changes.""" + try: + if not self._check_last_commit(owner, repo, branch): + return "Error: Last commit was not made by the current user" + + # Reset to previous commit + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch}" + commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" + + # Get the previous commit SHA + params: Dict[str, Union[str, int]] = {"per_page": 2, "sha": branch} + commits = requests.get(commits_url, headers=self._get_request_headers(), params=params, timeout=10).json() + previous_sha = commits[1]["sha"] + + # Update branch reference to previous commit + payload = {"sha": previous_sha, "force": True} + response = requests.patch(url, headers=self._get_request_headers(), json=payload, timeout=10) + response.raise_for_status() + + return "Successfully undid last change" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {e!s}" + + def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file creation.""" + try: + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" + content = b64encode(payload["content"].encode("utf-8")).decode("utf-8") + + data = {"message": payload["message"], "content": content, "branch": branch} + + response = requests.put(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + return "File created successfully" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {e!s}" + + def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file deletion.""" + try: + content, sha = self._get_file_content(owner, repo, payload["path"], branch) + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" + + data = {"message": payload["message"], "sha": sha, "branch": branch} + + response = requests.delete(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + return "File deleted successfully" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {e!s}" + + @component.output_types(result=str) + def run( + self, + command: Union[Command, str], + payload: Dict[str, Any], + repo: Optional[str] = None, + branch: Optional[str] = None, + ) -> Dict[str, str]: + """ + Process GitHub file operations. + + :param command: Operation to perform ("edit", "undo", "create", "delete") + :param payload: Dictionary containing command-specific parameters + :param repo: Repository in owner/repo format (overrides default if provided) + :param branch: Branch to perform operations on (overrides default if provided) + :return: Dictionary containing operation result + + :raises ValueError: If command is not a valid Command enum value + """ + if repo is None: + if self.default_repo is None: + return { + "result": "Error: No repository specified. Either provide it in initialization or in run() method" + } + repo = self.default_repo + + working_branch = branch if branch is not None else self.default_branch + owner, repo_name = repo.split("/") + + # Convert string command to Command enum if needed + if isinstance(command, str): + command = Command(command.lower()) + + command_handlers = { + Command.EDIT: self._edit_file, + Command.UNDO: self._undo_changes, + Command.CREATE: self._create_file, + Command.DELETE: self._delete_file, + } + + if command not in command_handlers: + return {"result": f"Error: Unknown command '{command}'"} + + result = command_handlers[command](owner, repo_name, payload, working_branch) + return {"result": result} + + def to_dict(self) -> Dict[str, Any]: + """Serialize the component to a dictionary.""" + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + repo=self.default_repo, + branch=self.default_branch, + raise_on_failure=self.raise_on_failure, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditor": + """Deserialize the component from a dictionary.""" + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py new file mode 100644 index 0000000000..987ce95ad0 --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py @@ -0,0 +1,161 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +import re +from typing import Any, Dict + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import deserialize_secrets_inplace +from haystack.utils.auth import Secret + +logger = logging.getLogger(__name__) + + +@component +class GitHubIssueCommenter: + """ + Posts comments to GitHub issues. + + The component takes a GitHub issue URL and comment text, then posts the comment + to the specified issue using the GitHub API. + + ### Usage example + ```python + from haystack_integrations.components.connectors.github import GitHubIssueCommenter + from haystack.utils import Secret + + commenter = GitHubIssueCommenter(github_token=Secret.from_env_var("GITHUB_TOKEN")) + result = commenter.run( + url="https://github.com/owner/repo/issues/123", + comment="Thanks for reporting this issue! We'll look into it." + ) + + print(result["success"]) + ``` + """ + + def __init__( + self, + *, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication as a Secret + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + """ + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + # Set base headers during initialization + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubIssueCommenter", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, int]: + """ + Parse GitHub URL into owner, repo and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + msg = f"Invalid GitHub issue URL format: {url}" + raise ValueError(msg) + + owner, repo, issue_number = match.groups() + return owner, repo, int(issue_number) + + def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) -> bool: + """ + Post a comment to a GitHub issue. + + :param owner: Repository owner + :param repo: Repository name + :param issue_number: Issue number + :param comment: Comment text to post + :return: True if comment was posted successfully + :raises requests.exceptions.RequestException: If the API request fails + """ + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments" + data = {"body": comment} + + for attempt in range(self.retry_attempts): + try: + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + return True + except requests.exceptions.RequestException as e: + if attempt == self.retry_attempts - 1: + raise + logger.warning(f"Attempt {attempt + 1} failed: {e!s}. Retrying...") + + return False + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenter": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(success=bool) + def run(self, url: str, comment: str) -> dict: + """ + Post a comment to a GitHub issue. + + :param url: GitHub issue URL + :param comment: Comment text to post + :return: Dictionary containing success status + """ + try: + owner, repo, issue_number = self._parse_github_url(url) + success = self._post_comment(owner, repo, issue_number, comment) + return {"success": success} + + except (requests.exceptions.RequestException, ValueError) as e: + if self.raise_on_failure: + raise + + error_message = f"Error posting comment to GitHub issue {url}: {e!s}" + logger.warning(error_message) + return {"success": False} diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py new file mode 100644 index 0000000000..5805ad59de --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py @@ -0,0 +1,216 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +import re +from typing import Any, Dict, List, Optional + +import requests +from haystack import Document, component, default_from_dict, default_to_dict, logging +from haystack.utils import deserialize_secrets_inplace +from haystack.utils.auth import Secret + +logger = logging.getLogger(__name__) + + +@component +class GitHubIssueViewer: + """ + Fetches and parses GitHub issues into Haystack documents. + + The component takes a GitHub issue URL and returns a list of documents where: + - First document contains the main issue content + - Subsequent documents contain the issue comments + + ### Usage example + ```python + from haystack_integrations.components.connectors.github import GitHubIssueViewer + + viewer = GitHubIssueViewer() + docs = viewer.run( + url="https://github.com/owner/repo/issues/123" + )["documents"] + + print(docs) + ``` + """ + + def __init__( + self, + *, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication as a Secret + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + """ + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + # Only set the basic headers during initialization + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubIssueViewer", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, int]: + """ + Parse GitHub URL into owner, repo and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + msg = f"Invalid GitHub issue URL format: {url}" + raise ValueError(msg) + + owner, repo, issue_number = match.groups() + return owner, repo, int(issue_number) + + def _fetch_issue(self, owner: str, repo: str, issue_number: int) -> Any: + """ + Fetch issue data from GitHub API. + + :param owner: Repository owner + :param repo: Repository name + :param issue_number: Issue number + :return: Issue data dictionary + """ + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}" + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + return response.json() + + def _fetch_comments(self, comments_url: str) -> Any: + """ + Fetch issue comments from GitHub API. + + :param comments_url: URL for issue comments + :return: List of comment dictionaries + """ + response = requests.get(comments_url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + return response.json() + + def _create_issue_document(self, issue_data: dict) -> Document: + """ + Create a Document from issue data. + + :param issue_data: Issue data from GitHub API + :return: Haystack Document + """ + return Document( # type: ignore + content=issue_data["body"], + meta={ + "type": "issue", + "title": issue_data["title"], + "number": issue_data["number"], + "state": issue_data["state"], + "created_at": issue_data["created_at"], + "updated_at": issue_data["updated_at"], + "author": issue_data["user"]["login"], + "url": issue_data["html_url"], + }, + ) + + def _create_comment_document(self, comment_data: dict, issue_number: int) -> Document: + """ + Create a Document from comment data. + + :param comment_data: Comment data from GitHub API + :param issue_number: Parent issue number + :return: Haystack Document + """ + return Document( + content=comment_data["body"], + meta={ + "type": "comment", + "issue_number": issue_number, + "created_at": comment_data["created_at"], + "updated_at": comment_data["updated_at"], + "author": comment_data["user"]["login"], + "url": comment_data["html_url"], + }, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewer": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(documents=List[Document]) + def run(self, url: str) -> dict: + """ + Process a GitHub issue URL and return documents. + + :param url: GitHub issue URL + :return: Dictionary containing list of documents + """ + try: + owner, repo, issue_number = self._parse_github_url(url) + + # Fetch issue data + issue_data = self._fetch_issue(owner, repo, issue_number) + documents = [self._create_issue_document(issue_data)] + + # Fetch and process comments if they exist + if issue_data["comments"] > 0: + comments = self._fetch_comments(issue_data["comments_url"]) + documents.extend(self._create_comment_document(comment, issue_number) for comment in comments) + + return {"documents": documents} + + except Exception as e: + if self.raise_on_failure: + raise + + error_message = f"Error processing GitHub issue {url}: {e!s}" + logger.warning(error_message) + error_doc = Document( + content=error_message, + meta={ + "error": True, + "type": "error", + "url": url, + }, + ) + return {"documents": [error_doc]} diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py new file mode 100644 index 0000000000..ecd35f3823 --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py @@ -0,0 +1,277 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +import re +from typing import Any, Dict, Optional + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@component +class GitHubPRCreator: + """ + A Haystack component for creating pull requests from a fork back to the original repository. + + Uses the authenticated user's fork to create the PR and links it to an existing issue. + + ### Usage example + ```python + from haystack_integrations.components.connectors.github import GitHubPRCreator + from haystack.utils import Secret + + pr_creator = GitHubPRCreator( + github_token=Secret.from_env_var("GITHUB_TOKEN") # Token from the fork owner + ) + + # Create a PR from your fork + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/123", + title="Fix issue #123", + body="This PR addresses issue #123", + branch="feature-branch", # The branch in your fork with the changes + base="main" # The branch in the original repo to merge into + ) + ``` + """ + + def __init__(self, *, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True): + """ + Initialize the component. + + :param github_token: GitHub personal access token for authentication (from the fork owner) + :param raise_on_failure: If True, raises exceptions on API errors + """ + if not isinstance(github_token, Secret): + msg = "github_token must be a Secret" + raise TypeError(msg) + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubPRCreator", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: + """ + Parse owner, repo name, and issue number from GitHub issue URL. + + :param issue_url: Full GitHub issue URL + :return: Tuple of (owner, repo_name, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, issue_url) + if not match: + msg = "Invalid GitHub issue URL format" + raise ValueError(msg) + return match.group(1), match.group(2), match.group(3) + + def _get_authenticated_user(self) -> str: + """Get the username of the authenticated user (fork owner).""" + response = requests.get("https://api.github.com/user", headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + return response.json()["login"] + + def _check_fork_exists(self, repo: str, fork_owner: str) -> bool: + """Check if the fork exists.""" + url = f"https://api.github.com/repos/{fork_owner}/{repo}" + try: + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + fork_data = response.json() + return fork_data.get("fork", False) + except requests.RequestException: + return False + + def _create_fork(self, owner: str, repo: str) -> Optional[str]: + """Create a fork of the repository.""" + url = f"https://api.github.com/repos/{owner}/{repo}/forks" + try: + response = requests.post(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + fork_data = response.json() + return fork_data["owner"]["login"] + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create fork: {e!s}" + raise RuntimeError(msg) from e + return None + + def _create_branch(self, owner: str, repo: str, branch_name: str, base_branch: str) -> bool: + """Create a new branch in the repository.""" + # Get the SHA of the base branch + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{base_branch}" + try: + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + base_sha = response.json()["object"]["sha"] + + # Create the new branch + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs" + data = {"ref": f"refs/heads/{branch_name}", "sha": base_sha} + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + return True + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create branch: {e!s}" + raise RuntimeError(msg) from e + return False + + def _create_commit( + self, + owner: str, + repo: str, + branch_name: str, + file_path: str, + content: str, + message: str, + ) -> bool: + """Create a commit with the file changes.""" + # Get the current commit SHA + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}" + try: + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + current_sha = response.json()["object"]["sha"] + + # Create a blob with the file content + url = f"https://api.github.com/repos/{owner}/{repo}/git/blobs" + data: dict[str, Any] = {"content": content, "encoding": "base64"} + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + blob_sha = response.json()["sha"] + + # Create a tree with the new file + url = f"https://api.github.com/repos/{owner}/{repo}/git/trees" + data = { + "base_tree": current_sha, + "tree": [{"path": file_path, "mode": "100644", "type": "blob", "sha": blob_sha}], + } + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + tree_sha = response.json()["sha"] + + # Create the commit + url = f"https://api.github.com/repos/{owner}/{repo}/git/commits" + data = {"message": message, "tree": tree_sha, "parents": [current_sha]} + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + commit_sha = response.json()["sha"] + + # Update the branch reference + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}" + data = {"sha": commit_sha} + response = requests.patch(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + return True + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create commit: {e!s}" + raise RuntimeError(msg) from e + return False + + def _create_pull_request( + self, + owner: str, + repo: str, + branch_name: str, + base_branch: str, + title: str, + body: str, + ) -> bool: + """Create a pull request.""" + url = f"https://api.github.com/repos/{owner}/{repo}/pulls" + data = {"title": title, "body": body, "head": branch_name, "base": base_branch} + try: + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) + response.raise_for_status() + return True + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create pull request: {e!s}" + raise RuntimeError(msg) from e + return False + + @component.output_types(result=str) + def run( + self, issue_url: str, title: str, branch: str, base: str, body: str = "", draft: bool = False + ) -> Dict[str, str]: + """ + Create a new pull request from your fork to the original repository, linked to the specified issue. + + :param issue_url: URL of the GitHub issue to link the PR to + :param title: Title of the pull request + :param branch: Name of the branch in your fork where changes are implemented + :param base: Name of the branch in the original repo you want to merge into + :param body: Additional content for the pull request description + :param draft: Whether to create a draft pull request + :return: Dictionary containing operation result + """ + try: + # Parse repository information from issue URL + owner, repo_name, issue_number = self._parse_issue_url(issue_url) + + # Get the authenticated user (fork owner) + fork_owner = self._get_authenticated_user() + + # Check if the fork exists + if not self._check_fork_exists(repo_name, fork_owner): + return {"result": f"Error: Fork not found at {fork_owner}/{repo_name}"} + + url = f"https://api.github.com/repos/{owner}/{repo_name}/pulls" + + # For cross-repository PRs, head must be in the format username:branch + head = f"{fork_owner}:{branch}" + + pr_data = { + "title": title, + "body": body, + "head": head, + "base": base, + "draft": draft, + "maintainer_can_modify": True, # Allow maintainers to modify the PR + } + + response = requests.post(url, headers=self._get_request_headers(), json=pr_data, timeout=10) + response.raise_for_status() + pr_number = response.json()["number"] + + return {"result": f"Pull request #{pr_number} created successfully and linked to issue #{issue_number}"} + + except (requests.RequestException, ValueError) as e: + if self.raise_on_failure: + raise + return {"result": f"Error: {e!s}"} + + def to_dict(self) -> Dict[str, Any]: + """Serialize the component to a dictionary.""" + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreator": + """Deserialize the component from a dictionary.""" + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py new file mode 100644 index 0000000000..dc5188b90e --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py @@ -0,0 +1,306 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +import re +from typing import Any, Dict, Optional + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@component +class GitHubRepoForker: + """ + Forks a GitHub repository from an issue URL. + + The component takes a GitHub issue URL, extracts the repository information, + creates or syncs a fork of that repository, and optionally creates an issue-specific branch. + + ### Usage example + ```python + from haystack_integrations.components.connectors.github import GitHubRepoForker + from haystack.utils import Secret + + # Using direct token with auto-sync and branch creation + forker = GitHubRepoForker( + github_token=Secret.from_env_var("GITHUB_TOKEN"), + auto_sync=True, + create_branch=True + ) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + print(result) + # Will create or sync fork and create branch "fix-123" + ``` + """ + + def __init__( + self, + *, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + wait_for_completion: bool = False, + max_wait_seconds: int = 300, + poll_interval: int = 2, + auto_sync: bool = True, + create_branch: bool = True, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param wait_for_completion: If True, waits until fork is fully created + :param max_wait_seconds: Maximum time to wait for fork completion in seconds + :param poll_interval: Time between status checks in seconds + :param auto_sync: If True, syncs fork with original repository if it already exists + :param create_branch: If True, creates a fix branch based on the issue number + """ + error_message = "github_token must be a Secret" + if not isinstance(github_token, Secret): + raise TypeError(error_message) + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.wait_for_completion = wait_for_completion + self.max_wait_seconds = max_wait_seconds + self.poll_interval = poll_interval + self.auto_sync = auto_sync + self.create_branch = create_branch + + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubRepoForker", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, str]: + """ + Parse GitHub URL into owner, repo, and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + error_message = f"Invalid GitHub issue URL format: {url}" + raise ValueError(error_message) + + owner, repo, issue_number = match.groups() + return owner, repo, issue_number + + def _check_fork_status(self, fork_path: str) -> bool: + """ + Check if a forked repository exists and is ready. + + :param fork_path: Repository path in owner/repo format + :return: True if fork exists and is ready, False otherwise + """ + url = f"https://api.github.com/repos/{fork_path}" + try: + response = requests.get( + url, + headers=self._get_request_headers(), + timeout=10, + ) + return response.status_code == 200 # noqa: PLR2004 + except requests.RequestException: + return False + + def _get_authenticated_user(self) -> str: + """ + Get the authenticated user's username. + + :return: Username of the authenticated user + :raises requests.RequestException: If API call fails + """ + url = "https://api.github.com/user" + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + return response.json()["login"] + + def _get_existing_repository(self, repo_name: str) -> Optional[str]: + """ + Check if a repository with the given name already exists in the authenticated user's account. + + :param repo_name: Repository name to check + :return: Full repository name if it exists, None otherwise + """ + url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" + try: + response = requests.get( + url, + headers=self._get_request_headers(), + timeout=10, + ) + if response.status_code == 200: # noqa: PLR2004 + return repo_name + return None + except requests.RequestException as e: + logger.warning(f"Failed to check repository existence: {e!s}") + return None + + def _sync_fork(self, fork_path: str) -> None: + """ + Sync a fork with its upstream repository. + + :param fork_path: Fork path in owner/repo format + :raises requests.RequestException: If sync fails + """ + url = f"https://api.github.com/repos/{fork_path}/merge-upstream" + response = requests.post( + url, + headers=self._get_request_headers(), + json={"branch": "main"}, + timeout=10, + ) + response.raise_for_status() + + def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: + """ + Create a new branch for the issue. + + :param fork_path: Fork path in owner/repo format + :param issue_number: Issue number to use in branch name + :raises requests.RequestException: If branch creation fails + """ + # First, get the default branch SHA + url = f"https://api.github.com/repos/{fork_path}" + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + default_branch = response.json()["default_branch"] + + # Get the SHA of the default branch + url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + sha = response.json()["object"]["sha"] + + # Create the new branch + branch_name = f"fix-{issue_number}" + url = f"https://api.github.com/repos/{fork_path}/git/refs" + response = requests.post( + url, + headers=self._get_request_headers(), + json={"ref": f"refs/heads/{branch_name}", "sha": sha}, + timeout=10, + ) + response.raise_for_status() + + def _create_fork(self, owner: str, repo: str) -> str: + """ + Create a fork of the repository. + + :param owner: Original repository owner + :param repo: Repository name + :return: Fork path in owner/repo format + :raises requests.RequestException: If fork creation fails + """ + url = f"https://api.github.com/repos/{owner}/{repo}/forks" + response = requests.post(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + + fork_data = response.json() + return f"{fork_data['owner']['login']}/{fork_data['name']}" + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + wait_for_completion=self.wait_for_completion, + max_wait_seconds=self.max_wait_seconds, + poll_interval=self.poll_interval, + auto_sync=self.auto_sync, + create_branch=self.create_branch, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForker": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(repo=str, issue_branch=str) + def run(self, url: str) -> dict: + """ + Process a GitHub issue URL and create or sync a fork of the repository. + + :param url: GitHub issue URL + :return: Dictionary containing repository path in owner/repo format + """ + try: + # Extract repository information + owner, repo, issue_number = self._parse_github_url(url) + + # Check if fork already exists + user = self._get_authenticated_user() + existing_fork = self._get_existing_repository(repo) + + if existing_fork and self.auto_sync: + # If fork exists and auto_sync is enabled, sync with upstream + fork_path = f"{user}/{repo}" + logger.info("Fork already exists, syncing with upstream repository") + self._sync_fork(fork_path) + else: + # Create new fork + fork_path = self._create_fork(owner, repo) + + # Wait for fork completion if requested + if self.wait_for_completion: + import time + + start_time = time.time() + + while time.time() - start_time < self.max_wait_seconds: + if self._check_fork_status(fork_path): + logger.info("Fork creation completed successfully") + break + logger.debug("Waiting for fork creation to complete...") + time.sleep(self.poll_interval) + else: + msg = f"Fork creation timed out after {self.max_wait_seconds} seconds" + if self.raise_on_failure: + raise TimeoutError(msg) + logger.warning(msg) + + # Create issue branch if enabled + issue_branch = None + if self.create_branch: + issue_branch = f"fix-{issue_number}" + logger.info(f"Creating branch for issue #{issue_number}") + self._create_issue_branch(fork_path, issue_number) + + return {"repo": fork_path, "issue_branch": issue_branch} + + except Exception as e: + if self.raise_on_failure: + raise + logger.warning("Error forking repository from {url}: {error}", url=url, error=str(e)) + return {"repo": "", "issue_branch": None} diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py new file mode 100644 index 0000000000..4ae4d02115 --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -0,0 +1,272 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +import base64 +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +import requests +from haystack import Document, component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@dataclass +class GitHubItem: + """Represents an item (file or directory) in a GitHub repository""" + + name: str + type: str # "file" or "dir" + path: str + size: int + url: str + content: Optional[str] = None + + +@component +class GitHubRepoViewer: + """ + Navigates and fetches content from GitHub repositories. + + For directories: + - Returns a list of Documents, one for each item + - Each Document's content is the item name + - Full path and metadata in Document.meta + + For files: + - Returns a single Document + - Document's content is the file content + - Full path and metadata in Document.meta + + For errors: + - Returns a single Document + - Document's content is the error message + - Document's meta contains type="error" + + ### Usage example + ```python + from haystack_integrations.components.connectors.github import GitHubRepoViewer + + viewer = GitHubRepoViewer() + + # List directory contents - returns multiple documents + result = viewer.run( + repo="owner/repository", + path="docs/", + branch="main" + ) + print(result) + + # Get specific file - returns single document + result = viewer.run( + repo="owner/repository", + path="README.md", + branch="main" + ) + print(result) + ``` + """ + + def __init__( + self, + *, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + max_file_size: int = 1_000_000, # 1MB default limit + repo: Optional[str] = None, + branch: str = "main", + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param max_file_size: Maximum file size in bytes to fetch (default: 1MB) + :param repo: Repository in format "owner/repo" + :param branch: Git reference (branch, tag, commit) to use + """ + if github_token is not None and not isinstance(github_token, Secret): + msg = "github_token must be a Secret" + raise TypeError(msg) + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.max_file_size = max_file_size + self.repo = repo + self.branch = branch + + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubRepoViewer", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + max_file_size=self.max_file_size, + repo=self.repo, + branch=self.branch, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewer": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + def _parse_repo(self, repo: str) -> tuple[str, str]: + """Parse owner/repo string""" + parts = repo.split("/") + if len(parts) != 2: # noqa: PLR2004 + msg = f"Invalid repository format. Expected 'owner/repo', got '{repo}'" + raise ValueError(msg) + return parts[0], parts[1] + + def _normalize_path(self, path: str) -> str: + """Normalize repository path""" + return path.strip("/") + + def _fetch_contents(self, owner: str, repo: str, path: str, ref: str) -> Any: + """Fetch repository contents from GitHub API""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + if ref: + url += f"?ref={ref}" + + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + return response.json() + + def _process_file_content(self, content: str, encoding: str) -> str: + """Process file content based on encoding""" + if encoding == "base64": + return base64.b64decode(content).decode("utf-8") + return content + + def _create_file_document(self, item: GitHubItem) -> Document: + """Create a Document from a file""" + return Document( + content=item.content if item.content else item.name, + meta={ + "path": item.path, + "type": "file_content", + "size": item.size, + "url": item.url, + }, + ) + + def _create_directory_documents(self, items: List[GitHubItem]) -> List[Document]: + """Create a list of Documents from directory contents""" + return [ + Document( + content=item.name, + meta={ + "path": item.path, + "type": item.type, + "size": item.size, + "url": item.url, + }, + ) + for item in sorted(items, key=lambda x: (x.type != "dir", x.name.lower())) + ] + + def _create_error_document(self, error: Exception, path: str) -> Document: + """Create a Document from an error""" + return Document( + content=str(error), + meta={ + "type": "error", + "path": path, + }, + ) + + @component.output_types(documents=List[Document]) + def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = None) -> Dict[str, List[Document]]: + """ + Process a GitHub repository path and return documents. + + :param repo: Repository in format "owner/repo" + :param path: Path within repository (default: root) + :param branch: Git reference (branch, tag, commit) to use + :return: Dictionary containing list of documents + """ + if repo is None: + repo = self.repo + if repo is None: + msg = "Repository not provided in initialization or run() method" + raise ValueError(msg) + if branch is None: + branch = self.branch + + try: + owner, repo_name = self._parse_repo(repo) + normalized_path = self._normalize_path(path) + + contents = self._fetch_contents(owner, repo_name, normalized_path, branch) + + # Handle single file response + if not isinstance(contents, list): + if contents.get("size", 0) > self.max_file_size: + error_message = f"File size {contents['size']} exceeds limit of {self.max_file_size}" + raise ValueError(error_message) + + item = GitHubItem( + name=contents["name"], + type="file", + path=contents["path"], + size=contents["size"], + url=contents["html_url"], + content=self._process_file_content(contents["content"], contents["encoding"]), + ) + return {"documents": [self._create_file_document(item)]} + + # Handle directory listing + items = [ + GitHubItem( + name=item["name"], + type="dir" if item["type"] == "dir" else "file", + path=item["path"], + size=item.get("size", 0), + url=item["html_url"], + ) + for item in contents + ] + + return {"documents": self._create_directory_documents(items)} + + except Exception as e: + error_doc = self._create_error_document( + Exception(f"Error processing repository path {path}: {e!s}. Seems like the file does not exist."), path + ) + if self.raise_on_failure: + raise + logger.warning( + "Error processing repository path {path}: {error}", + path=path, + error=str(e), + ) + return {"documents": [error_doc]} diff --git a/integrations/github/src/haystack_integrations/prompts/github/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py new file mode 100644 index 0000000000..b13f31c239 --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/__init__.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .context_prompt import CONTEXT_PROMPT +from .file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from .issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA +from .pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA +from .repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA +from .system_prompt import SYSTEM_PROMPT + +__all__ = [ + "CONTEXT_PROMPT", + "FILE_EDITOR_PROMPT", + "FILE_EDITOR_SCHEMA", + "ISSUE_COMMENTER_PROMPT", + "ISSUE_COMMENTER_SCHEMA", + "PR_CREATOR_PROMPT", + "PR_CREATOR_SCHEMA", + "REPO_VIEWER_PROMPT", + "REPO_VIEWER_SCHEMA", + "SYSTEM_PROMPT", +] diff --git a/integrations/github/src/haystack_integrations/prompts/github/context_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/context_prompt.py new file mode 100644 index 0000000000..a920a0875b --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/context_prompt.py @@ -0,0 +1,176 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +CONTEXT_PROMPT = """Haystack-Agent was specifically designed to help developers with the Haystack-framework + and any Haystack related questions. +The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. +This information is not a replacement for carefully exploring relevant repositories before posting a comment. + +**Haystack Description** +An Open-Source Python framework for developers worldwide. +AI orchestration framework to build customizable, production-ready LLM applications. +Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. +With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or +conversational agent chatbots. + +**High-Level Architecture** +Haystack has two central abstractions: +- Components +- Pipelines + +A Component is a lightweight abstraction that gets inputs, performs an action and returns outputs. +Some example components: +- `OpenAIGenerator`: receives a prompt and generates replies to the prompt by calling an OpenAI-model +- `MetadataRouter`: routes documents to configurable outputs based on their metadata +- `BM25Retriever`: retrieves documents from a 'DocumentStore' based on the 'query'-input + +A component is lightweight. It is easy to implement custom components. Here is some information from the docs: + +Requirements + +Here are the requirements for all custom components: + +- `@component`: This decorator marks a class as a component, allowing it to be used in a pipeline. +- `run()`: This is a required method in every component. It accepts input arguments and returns a `dict`. The inputs can +either come from the pipeline when it's executed, or from the output of another component when connected using +`connect()`. The `run()` method should be compatible with the input/output definitions declared for the component. +See an [Extended Example](#extended-example) below to check how it works. + +## Inputs and Outputs + +Next, define the inputs and outputs for your component. + +### Inputs + +You can choose between three input options: + +- `set_input_type`: This method defines or updates a single input socket for a component instance. It's ideal for adding +or modifying a specific input at runtime without affecting others. Use this when you need to dynamically set or modify +a single input based on specific conditions. +- `set_input_types`: This method allows you to define multiple input sockets at once, replacing any existing inputs. +It's useful when you know all the inputs the component will need and want to configure them in bulk. Use this when you +want to define multiple inputs during initialization. +- Declaring arguments directly in the `run()` method. Use this method when the component's inputs are static and known +at the time of class definition. + +### Outputs + +You can choose between two output options: + +- `@component.output_types`: This decorator defines the output types and names at the time of class definition. The +output names and types must match the `dict` returned by the `run()` method. Use this when the output types are static +and known in advance. This decorator is cleaner and more readable for static components. +- `set_output_types`: This method defines or updates multiple output sockets for a component instance at runtime. +It's useful when you need flexibility in configuring outputs dynamically. Use this when the output types need to be set +at runtime for greater flexibility. + +# Short Example + +Here is an example of a simple minimal component setup: + +```python +from haystack import component + +@component +class WelcomeTextGenerator: + ''' + A component generating personal welcome message and making it upper case + ''' + @component.output_types(welcome_text=str, note=str) + def run(self, name:str): + return {"welcome_text": f'Hello {name}, welcome to Haystack!'.upper(), "note": "welcome message is ready"} + +``` + +Here, the custom component `WelcomeTextGenerator` accepts one input: `name` string and returns two outputs: +`welcome_text` and `note`. + + +---------- + +**Pipelines** +The pipelines in Haystack 2.0 are directed multigraphs of different Haystack components and integrations. +They give you the freedom to connect these components in various ways. This means that the +pipeline doesn't need to be a continuous stream of information. With the flexibility of Haystack pipelines, +you can have simultaneous flows, standalone components, loops, and other types of connections. + +# Steps to Create a Pipeline Explained + +Once all your components are created and ready to be combined in a pipeline, there are four steps to make it work: + +1. Create the pipeline with `Pipeline()`. + This creates the Pipeline object. +2. Add components to the pipeline, one by one, with `.add_component(name, component)`. + This just adds components to the pipeline without connecting them yet. It's especially useful for loops as it allows + the smooth connection of the components in the next step because they all already exist in the pipeline. +3. Connect components with `.connect("producer_component.output_name", "consumer_component.input_name")`. + At this step, you explicitly connect one of the outputs of a component to one of the inputs of the next component. + This is also when the pipeline validates the connection without running the components. It makes the validation fast. +4. Run the pipeline with `.run({"component_1": {"mandatory_inputs": value}})`. + Finally, you run the Pipeline by specifying the first component in the pipeline and passing its mandatory inputs. + + Optionally, you can pass inputs to other components, for example: + `.run({"component_1": {"mandatory_inputs": value}, "component_2": {"inputs": value}})`. + +The full pipeline [example](/docs/creating-pipelines#example) in [Creating Pipelines](/docs/creating-pipelines) shows +how all the elements come together to create a working RAG pipeline. + +Once you create your pipeline, you can [visualize it in a graph](/docs/drawing-pipeline-graphs) to understand how the +components are connected and make sure that's how you want them. You can use Mermaid graphs to do that. + +# Validation + +Validation happens when you connect pipeline components with `.connect()`, but before running the components to make it +faster. The pipeline validates that: + +- The components exist in the pipeline. +- The components' outputs and inputs match and are explicitly indicated. For example, if a component produces two +outputs, when connecting it to another component, you must indicate which output connects to which input. +- The components' types match. +- For input types other than `Variadic`, checks if the input is already occupied by another connection. + +All of these checks produce detailed errors to help you quickly fix any issues identified. + +# Serialization + +Thanks to serialization, you can save and then load your pipelines. Serialization is converting a Haystack pipeline +into a format you can store on disk or send over the wire. It's particularly useful for: + +- Editing, storing, and sharing pipelines. +- Modifying existing pipelines in a format different than Python. + +Haystack pipelines delegate the serialization to its components, so serializing a pipeline simply means serializing +each component in the pipeline one after the other, along with their connections. The pipeline is serialized into a +dictionary format, which acts as an intermediate format that you can then convert into the final format you want. + +> 📘 Serialization formats +> +> Haystack 2.0 only supports YAML format at this time. We'll be rolling out more formats gradually. + +For serialization to be possible, components must support conversion from and to Python dictionaries. All Haystack +components have two methods that make them serializable: `from_dict` and `to_dict`. The `Pipeline` class, in turn, has +its own `from_dict` and `to_dict` methods that take care of serializing components and connections. + + +--------- + +**Haystack Repositories** + +1. "deepset-ai/haystack" + +Contains the core code for the Haystack framework and a few components. +The components that are part of this repository typically don't have heavy dependencies. + + +2. "deepset-ai/haystack-core-integrations" + +This is a mono-repo maintained by the deepset-Team that contains integrations for the Haystack framework. +Typically, an integration consists of one or more components. Some integrations only contain document stores. +Each integration is a standalone pypi-package but you can find all of them in the core integrations repo. + + +3. "deepset-ai/haystack-experimental" + +Contains experimental features for the Haystack framework. + +""" diff --git a/integrations/github/src/haystack_integrations/prompts/github/file_editor_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/file_editor_prompt.py new file mode 100644 index 0000000000..b87e903167 --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/file_editor_prompt.py @@ -0,0 +1,122 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +FILE_EDITOR_PROMPT = """Use the file editor to edit an existing file in the repository. + +You must provide a 'command' for the action that you want to perform: +- edit +- create +- delete +- undo + +The 'payload' contains your options for each command. + +**Command 'edit'** + +To edit a file, you need to provide: +1. The path to the file +2. The original code snippet from the file +3. Your replacement code +4. A commit message + +The code will only be replaced if it is unique in the file. Pass a minimum of 2 consecutive lines that should +be replaced. If the original is not unique, the editor will return an error. +Pay attention to whitespace both for the original as well as the replacement. + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +Example: +{ + "command": "edit", + "payload": { + "path": "README.md", + "original": "This is a placeholder description!\\nIt should be updated.", + "replacement": "This project helps developers test AI applications.", + "message": "docs: README should mention project purpose." + } +} + + +**Command 'create'** + +To create a file, you need to provide: +1. The path for the new file +2. The content for the file +3. A commit message + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +IMPORTANT: +You MUST ALWAYS provide 'content' when creating a new file. File creation with empty content does not work. + +Example: +{ + "command": "create", + "payload": { + "path": "CONTRIBUTING.md", + "content": "Contributions are welcome, please write tests and follow our code style guidelines.", + "message": "chore: minimal instructions for contributors" + } +} + + +**Command 'delete'** + +To delete a file, you need to provide: +1. The path to the file to delete +2. A commit message + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +Example: +{ + "command": "delete", + "payload": { + "path": "tests/components/test_messaging", + "message": "chore: messaging feature was removed" + } +} + +**Command 'undo'** + +This is how to undo your latest change. + +Important notes: +- You can only undo your own changes +- You can only undo one change at a time +- You need to provide a message for the undo operation + +Example: +{ + "command": "undo", + "payload": { + "message": "revert: undo previous commit due to failing tests" + } +} +""" + +FILE_EDITOR_SCHEMA = { + "type": "object", + "properties": { + "command": { + "type": "string", + "enum": ["edit", "create", "delete", "undo"], + "description": "The command to execute", + }, + "payload": { + "type": "object", + "required": ["message"], + "properties": { + "message": {"type": "string"}, + "content": {"type": "string"}, + "path": {"type": "string"}, + "original": {"type": "string"}, + "replacement": {"type": "string"}, + }, + }, + }, + "required": ["command", "payload"], +} diff --git a/integrations/github/src/haystack_integrations/prompts/github/issue_commenter_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/issue_commenter_prompt.py new file mode 100644 index 0000000000..f35411293e --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/issue_commenter_prompt.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +ISSUE_COMMENTER_PROMPT = """Haystack-Agent uses this tool to post a comment to a GitHub-issue discussion. + + +Pass a `comment` string to post a comment. + + +IMPORTANT +Haystack-Agent MUST pass "comment" to this tool. Otherwise, comment creation fails. +Haystack-Agent always passes the contents of the comment to the "comment" parameter when calling this tool. +""" + +ISSUE_COMMENTER_SCHEMA = { + "properties": { + "comment": {"type": "string", "description": "The contents of the comment that you want to create."} + }, + "required": ["comment"], + "type": "object", +} diff --git a/integrations/github/src/haystack_integrations/prompts/github/issue_viewer_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/issue_viewer_prompt.py new file mode 100644 index 0000000000..af01903c74 --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/issue_viewer_prompt.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +ISSUE_VIEWER_PROMPT = """Haystack-Agent uses this tool to view a GitHub issue. +Haystack-Agent can view one issue at a time. + + +Pass an `issue_url` string for the GitHub issue that you want to view. +It is required to pass `issue_url` to use this tool. +The structure is "https://github.com/repo-owner/repo-name/issues/issue-number". + +Examples: + +- {"issue_url": "https://github.com/deepset-ai/haystack/issues/9343"} + - will show you the issue 9343 of the haystack repository +- {"issue_url": "https://github.com/deepset-ai/haystack-core-integrations/issues/1685"} + - will show you the issue 1685 of the haystack-core-integrations repository + +""" + +ISSUE_VIEWER_SCHEMA = { + "properties": {"issue_url": {"type": "string", "description": "URL of the GitHub issue to link the PR to."}}, + "required": ["issue_url"], + "type": "object", +} diff --git a/integrations/github/src/haystack_integrations/prompts/github/pr_creator_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_creator_prompt.py new file mode 100644 index 0000000000..d8df98e691 --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/pr_creator_prompt.py @@ -0,0 +1,83 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +PR_CREATOR_PROMPT = """The assistant is Haystack-Agent, created by deepset. +Haystack-Agent creates Pull Requests that resolve GitHub issues. + +Haystack-Agent receives a GitHub issue and all current comments. +Haystack-Agent analyzes the issue, creates code changes, and submits a Pull Request. + +**Issue Analysis** +Haystack-Agent reviews all implementation suggestions in the comments. +Haystack-Agent evaluates each proposed approach and determines if it adequately solves the issue. +Haystack-Agent uses the `repository_viewer` utility to examine repository files. +Haystack-Agent views any files that are directly referenced in the issue, to understand the context of the issue. +Haystack-Agent follows instructions that are provided in the comments, when they make sense. + +**Software Engineering** +Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. +Haystack-Agent finds the right level of abstraction and complexity. +When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and +documentation patterns that are already being used in the codebase. +Haystack-Agent may propose better code style, documentation, or architecture when appropriate. +Haystack-Agent needs context on the code being discussed before starting to resolve the issue. +Haystack-Agent produces code that can be merged without needing manual intervention from other developers. +Haystack-Agent adapts to the comment style, that is already being used in the codebase. +It avoids superfluous comments that point out the obvious. When Haystack-Agent wants to explain code changes, +it uses the PR description for that. + +**Thinking Process** +Haystack-Agent thinks thoroughly about each issue. +Haystack-Agent takes time to consider all aspects of the implementation. +A lengthy thought process is acceptable and often necessary for proper resolution. + + +Haystack-Agent notes down any thoughts and observations in the scratchpad, so that it can reference them later. + + +**Resolution Process** +Haystack-Agent follows these steps to resolve issues: + +1. Analyze the issue and comments, noting all proposed implementations +2. Explore the repository from the root (/) directory +3. Examine files referenced in the issue or comments +4. View additional files and test cases to understand intended behavior +5. Create initial test cases to validate the planned solution +6. Edit repository source code to resolve the issue +7. Update test cases to match code changes +8. Handle edge cases and ensure code matches repository style +9. Create a Pull Request using the `create_pr` utility + +**Pull Request Creation** +Haystack-Agent writes clear Pull Request descriptions. +Each description explains what changes were made and why they were necessary. +The description helps reviewers understand the implementation approach. +""" + +PR_CREATOR_SCHEMA = { + "properties": { + "issue_url": {"type": "string", "description": "URL of the GitHub issue to link the PR to."}, + "title": { + "type": "string", + "description": "Title of the pull request.", + }, + "branch": { + "type": "string", + "description": "Name of the branch in your fork where changes are implemented.", + }, + "base": { + "type": "string", + "description": "Name of the branch in the original repo you want to merge into.", + }, + "body": { + "type": "string", + "description": "Additional content for the pull request description.", + }, + "draft": { + "type": "boolean", + "description": "Whether to create a draft pull request.", + }, + }, + "required": ["issue_url", "title", "branch", "base"], + "type": "object", +} diff --git a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_prompt.py new file mode 100644 index 0000000000..081534f515 --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_prompt.py @@ -0,0 +1,81 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +REPO_VIEWER_PROMPT = """Haystack-Agent uses this tool to browse GitHub repositories. +Haystack-Agent can view directories and files with this tool. + + +Pass a `repo` string for the repository that you want to view. +It is required to pass `repo` to use this tool. +The structure is "owner/repo-name". + +Pass a `path` string for the directory or file that you want to view. +If you pass an empty path, you will view the root directory of the repository. + +Examples: + +- {"repo": "pandas-dev/pandas", "path": ""} + - will show you the root of the pandas repository +- {"repo": "pandas-dev/pandas", "path": "pyproject.toml"} + - will show you the "pyproject.toml"-file of the pandas repository +- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert"} + - will show you the "albert"-directory in the transformers repository +- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert/albert_modelling.py"} + - will show you the source code for the albert model in the transformers repository + + +Haystack-Agent uses the `github_repository_viewer` to view relevant code. +Haystack-Agent starts at the root of the repository. +Haystack-Agent navigates one level at a time using directory listings. +Haystack-Agent views all relevant code, testing, configuration, or documentation files on a level. +It never skips a directory level or guesses full paths. + +Haystack-Agent thinks deeply about the content of a repository. Before Haystack-Agent uses the tool, it reasons about +next steps: + + +- What am I looking for in this location? +- Why is this path potentially relevant? +- What specific files might help solve the issue? +- What patterns or implementations should I look for? + + +After viewing the contents of a file or directory, Haystack-Agent reflects on its observations before moving on: + +- What did I learn from these files? +- What else might be related? +- Where should I look next and why? + + +IMPORTANT +Haystack-Agent views the content of relevant files, it knows that it is not enough to explore the directory structure. +Haystack-Agent needs to read the code to understand it properly. +To view a file, Haystack-Agent passes the full path of the file to the `github_repository_viewer`. +Haystack-Agent never guesses a file or directory path. + +Haystack-Agent takes notes after viewing code: + +- extract important code snippets +- document key functions, classes or configurations +- note key architecture patterns +- relate findings to the original issue +- relate findings to other code that was already viewed +- note down file paths as a reference + +""" + +REPO_VIEWER_SCHEMA = { + "properties": { + "repo": {"type": "string", "description": "The owner/repository_name that you want to view."}, + "path": { + "type": "string", + "description": "Path to directory or file to view. Defaults to repository root.", + }, + "branch": { + "type": "string", + "description": "Branch to view. Defaults to 'main'.", + }, + }, + "required": ["repo"], + "type": "object", +} diff --git a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py new file mode 100644 index 0000000000..cd48741508 --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +SYSTEM_PROMPT = """The assistant is Haystack-Agent, created by deepset. +Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. + +Haystack-Agent receives a GitHub issue and all current comments. +Haystack-Agent participates in the discussion by: +- helping users find answers to their questions +- analyzing bug reports and proposing a fix when necessary +- analyzing feature requests and proposing an implementation +- being a sounding board in architecture discussions and proposing alternative solutions + +**Style** +Haystack-Agent uses Markdown formatting. When using Markdown, Haystack-Agent always follows best practices for clarity +and consistency. +It always uses a single space after hash symbols for headers (e.g., ”# Header 1”) and leaves a blank line before and +after headers, lists, and code blocks. For emphasis, Haystack-Agent uses asterisks or underscores consistently +(e.g., italic or bold). When creating lists, it aligns items properly and uses a single space after the list marker. +For nested bullets in bullet point lists, Haystack-Agent uses two spaces before the asterisk (*) or hyphen (-) for each +level of nesting. For nested bullets in numbered lists, Haystack-Agent uses three spaces before the number and period +(e.g., “1.”) for each level of nesting. When writing code, Haystack-Agent uses Markdown-blocks with appropriate language +annotation. + +**Software Engineering** +Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. +Haystack-Agent finds the right level of abstraction and complexity. +When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and +documentation patterns that are already being used in the codebase. +Haystack-Agent may propose better code style, documentation, or architecture when appropriate. +Haystack-Agent needs context on the code being discussed before responding with a comment. +Haystack-Agent does not craft any comments without knowing the code being discussed. +Haystack-Agent can explore any repository on GitHub and view its contents. + +**Exploring Repositories** +Haystack-Agent uses the `repository_viewer` to explore GitHub repositories before crafting a comment. +Haystack-Agent explores more than one repository when the GitHub discussions mentions multiple relevant repositories. + +**Thinking** +Haystack-Agent is a rigorous thinker. It uses -blocks to gather thoughts, reflect on the issue at +hand, and relate its learnings to it. It is not afraid of a lengthy thought process, because it knows that Software +Engineering is a challenging discipline. +Haystack-Agent takes notes on the . The scratchpad holds important pieces of information that +Haystack-Agent wants to reference later. + +**Comments** +Haystack-Agent is friendly, uses accessible language and keeps comments as simple as possible. +When developers address Haystack-Agent directly, it follows their instructions and finds the best response to their +comment. Haystack-Agent is happy to revise its code when a developer asks for it. +Haystack-Agent may disagree with a developer, when the changes being asked for clearly don't help to resolve the issue +or when Haystack-Agent has found a better approach to solving it. +Haystack-Agent uses the `create_comment`-tool to create a comment. Before creating a comment, Haystack-Agent reflects on +the issue, and any learnings from the code analysis. Haystack-Agent only responds when ready. + + +Haystack-Agent, this is IMPORTANT: +- DO NOT START WRITING YOUR RESPONSE UNTIL YOU HAVE COMPLETED THE ENTIRE EXPLORATION PHASE +- VIEWING DIRECTORY LISTINGS IS NOT ENOUGH - YOU MUST EXAMINE FILE CONTENTS +- If you find yourself running out of context space during exploration, say: "I need to continue exploring the codebase +before providing a complete response." Then continue exploration in the next interaction. + +Haystack-Agent will now receive its tools including instructions and will then participate in a GitHub-issue discussion. +""" diff --git a/integrations/github/src/haystack_integrations/tools/github/__init__.py b/integrations/github/src/haystack_integrations/tools/github/__init__.py new file mode 100644 index 0000000000..fabeb6acd6 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/__init__.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .file_editor_tool import GitHubFileEditorTool +from .issue_commenter_tool import GitHubIssueCommenterTool +from .issue_viewer_tool import GitHubIssueViewerTool +from .pr_creator_tool import GitHubPRCreatorTool +from .repo_viewer_tool import GitHubRepoViewerTool + +__all__ = [ + "GitHubFileEditorTool", + "GitHubIssueCommenterTool", + "GitHubIssueViewerTool", + "GitHubPRCreatorTool", + "GitHubRepoViewerTool", +] diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py new file mode 100644 index 0000000000..ac1a215231 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -0,0 +1,128 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Callable, Dict, Optional, Union + +from haystack.core.serialization import generate_qualified_class_name +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.file_editor import GitHubFileEditor +from haystack_integrations.prompts.github.file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers + + +class GitHubFileEditorTool(ComponentTool): + """ + A tool for editing files in GitHub repositories. + """ + + def __init__( + self, + *, + name: Optional[str] = "file_editor", + description: Optional[str] = FILE_EDITOR_PROMPT, + parameters: Optional[Dict[str, Any]] = FILE_EDITOR_SCHEMA, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + repo: Optional[str] = None, + branch: str = "main", + raise_on_failure: bool = True, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + ): + """ + Initialize the GitHub file editor tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.repo = repo + self.branch = branch + self.raise_on_failure = raise_on_failure + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state + + file_editor = GitHubFileEditor( + github_token=github_token, + repo=repo, + branch=branch, + raise_on_failure=raise_on_failure, + ) + super().__init__( + component=file_editor, + name=name, + description=description, + parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "repo": self.repo, + "branch": self.branch, + "raise_on_failure": self.raise_on_failure, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return {"type": generate_qualified_class_name(type(self)), "data": serialized} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py new file mode 100644 index 0000000000..43d37740f2 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py @@ -0,0 +1,123 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Callable, Dict, Optional, Union + +from haystack.core.serialization import generate_qualified_class_name +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.issue_commenter import GitHubIssueCommenter +from haystack_integrations.prompts.github.issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers + + +class GitHubIssueCommenterTool(ComponentTool): + """ + A tool for commenting on GitHub issues. + """ + + def __init__( + self, + *, + name: Optional[str] = "issue_commenter", + description: Optional[str] = ISSUE_COMMENTER_PROMPT, + parameters: Optional[Dict[str, Any]] = ISSUE_COMMENTER_SCHEMA, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + retry_attempts: int = 2, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + ): + """ + Initialize the GitHub issue commenter tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state + + issue_commenter = GitHubIssueCommenter( + github_token=github_token, + raise_on_failure=raise_on_failure, + retry_attempts=retry_attempts, + ) + super().__init__( + component=issue_commenter, + name=name, + description=description, + parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "retry_attempts": self.retry_attempts, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return {"type": generate_qualified_class_name(type(self)), "data": serialized} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenterTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py new file mode 100644 index 0000000000..48dc28f0c3 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py @@ -0,0 +1,123 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Callable, Dict, Optional, Union + +from haystack.core.serialization import generate_qualified_class_name +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.issue_viewer import GitHubIssueViewer +from haystack_integrations.prompts.github.issue_viewer_prompt import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers + + +class GitHubIssueViewerTool(ComponentTool): + """ + A tool for viewing GitHub issues. + """ + + def __init__( + self, + *, + name: Optional[str] = "issue_viewer", + description: Optional[str] = ISSUE_VIEWER_PROMPT, + parameters: Optional[Dict[str, Any]] = ISSUE_VIEWER_SCHEMA, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + retry_attempts: int = 2, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + ): + """ + Initialize the GitHub issue viewer tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: Optional GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state + + issue_viewer = GitHubIssueViewer( + github_token=github_token, + raise_on_failure=raise_on_failure, + retry_attempts=retry_attempts, + ) + super().__init__( + component=issue_viewer, + name=name, + description=description, + parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "retry_attempts": self.retry_attempts, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return {"type": generate_qualified_class_name(type(self)), "data": serialized} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewerTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py new file mode 100644 index 0000000000..9d480658ab --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -0,0 +1,117 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Callable, Dict, Optional, Union + +from haystack.core.serialization import generate_qualified_class_name +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.pr_creator import GitHubPRCreator +from haystack_integrations.prompts.github.pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers + + +class GitHubPRCreatorTool(ComponentTool): + """ + A tool for creating pull requests in GitHub repositories. + """ + + def __init__( + self, + *, + name: Optional[str] = "pr_creator", + description: Optional[str] = PR_CREATOR_PROMPT, + parameters: Optional[Dict[str, Any]] = PR_CREATOR_SCHEMA, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + ): + """ + Initialize the GitHub PR creator tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state + + pr_creator = GitHubPRCreator( + github_token=github_token, + raise_on_failure=raise_on_failure, + ) + super().__init__( + component=pr_creator, + name=name, + description=description, + parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return {"type": generate_qualified_class_name(type(self)), "data": serialized} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreatorTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py new file mode 100644 index 0000000000..43a0b0c77a --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -0,0 +1,138 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Callable, Dict, Optional, Union + +from haystack.core.serialization import generate_qualified_class_name +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer +from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, message_handler, serialize_handlers + + +class GitHubRepoViewerTool(ComponentTool): + """ + A tool for viewing files and directories in GitHub repositories. + """ + + def __init__( + self, + *, + name: Optional[str] = "repo_viewer", + description: Optional[str] = REPO_VIEWER_PROMPT, + parameters: Optional[Dict[str, Any]] = REPO_VIEWER_SCHEMA, + github_token: Optional[Secret] = None, + repo: Optional[str] = None, + branch: str = "main", + raise_on_failure: bool = True, + max_file_size: int = 1_000_000, # 1MB default limit + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + ): + """ + Initialize the GitHub repository viewer tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: Optional GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + :param max_file_size: Maximum file size in bytes to read + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + By default, truncates the document.content of the viewed files to 150,000 characters each. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + By default, the tool does not use any inputs from state. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + By default, outputs the viewed files as documents to the state. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.repo = repo + self.branch = branch + self.raise_on_failure = raise_on_failure + self.max_file_size = max_file_size + + # Set default values for mutable parameters + self.outputs_to_string = outputs_to_string or {"source": "documents", "handler": message_handler} + self.inputs_from_state = inputs_from_state or {} + self.outputs_to_state = outputs_to_state or {"documents": {"source": "documents"}} + + repo_viewer = GitHubRepoViewer( + github_token=github_token, + repo=repo, + branch=branch, + raise_on_failure=raise_on_failure, + max_file_size=max_file_size, + ) + super().__init__( + component=repo_viewer, + name=name, + description=description, + parameters=parameters, + outputs_to_string=self.outputs_to_string, + inputs_from_state=self.inputs_from_state, + outputs_to_state=self.outputs_to_state, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "repo": self.repo, + "branch": self.branch, + "raise_on_failure": self.raise_on_failure, + "max_file_size": self.max_file_size, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return {"type": generate_qualified_class_name(type(self)), "data": serialized} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/src/haystack_integrations/tools/github/utils.py b/integrations/github/src/haystack_integrations/tools/github/utils.py new file mode 100644 index 0000000000..0d4c743302 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/utils.py @@ -0,0 +1,73 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Callable, Dict, List, Union + +from haystack import Document +from haystack.utils.callable_serialization import deserialize_callable, serialize_callable + + +def message_handler(documents: List[Document], max_length: int = 150_000) -> str: + """ + Handles the tool output before conversion to ChatMessage. + + :param documents: List of Document objects + :param max_length: Maximum number of characters of the result string + :returns: + String representation of the documents. + """ + result_str = "" + for document in documents: + if document.meta["type"] in ["file", "dir", "error"]: + result_str += document.content + "\n" + else: + result_str += f"File Content for {document.meta['path']}\n\n" + result_str += document.content + + if len(result_str) > max_length: + result_str = result_str[:max_length] + "...(large file can't be fully displayed)" + + return result_str + + +def serialize_handlers( + serialized: Dict[str, Any], + outputs_to_state: Dict[str, Dict[str, Union[str, Callable]]], + outputs_to_string: Dict[str, Union[str, Callable[[Any], str]]], +) -> None: + """ + Serializes callable handlers in outputs_to_state and outputs_to_string. + + :param serialized: The dictionary to update with serialized handlers + :param outputs_to_state: Dictionary containing state output configurations + :param outputs_to_string: Dictionary containing string output configuration + """ + if outputs_to_state is not None: + serialized_outputs = {} + for key, config in outputs_to_state.items(): + serialized_config = config.copy() + if "handler" in config: + serialized_config["handler"] = serialize_callable(config["handler"]) + serialized_outputs[key] = serialized_config + serialized["outputs_to_state"] = serialized_outputs + + if outputs_to_string is not None and "handler" in outputs_to_string: + serialized_string = outputs_to_string.copy() + serialized_string["handler"] = serialize_callable(outputs_to_string["handler"]) + serialized["outputs_to_string"] = serialized_string + + +def deserialize_handlers(data: Dict[str, Any]) -> None: + """ + Deserializes callable handlers in outputs_to_state and outputs_to_string. + + :param data: The dictionary containing serialized handlers to deserialize + """ + if data.get("outputs_to_state"): + for config in data["outputs_to_state"].values(): + if "handler" in config: + config["handler"] = deserialize_callable(config["handler"]) + + if "outputs_to_string" in data and data["outputs_to_string"] and "handler" in data["outputs_to_string"]: + data["outputs_to_string"]["handler"] = deserialize_callable(data["outputs_to_string"]["handler"]) diff --git a/integrations/github/tests/__init__.py b/integrations/github/tests/__init__.py new file mode 100644 index 0000000000..d391382c6b --- /dev/null +++ b/integrations/github/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py new file mode 100644 index 0000000000..a52d879d14 --- /dev/null +++ b/integrations/github/tests/test_file_editor.py @@ -0,0 +1,271 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +import requests +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.file_editor import Command, GitHubFileEditor + + +class TestGitHubFileEditor: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + editor = GitHubFileEditor() + assert editor.github_token is not None + assert editor.github_token.resolve_value() == "test-token" + assert editor.default_repo is None + assert editor.default_branch == "main" + assert editor.raise_on_failure is True + + def test_init_with_parameters(self): + token = Secret.from_token("test-token") + editor = GitHubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) + assert editor.github_token == token + assert editor.default_repo == "owner/repo" + assert editor.default_branch == "feature" + assert editor.raise_on_failure is False + + with pytest.raises(TypeError): + GitHubFileEditor(github_token="not_a_secret") + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + token = Secret.from_env_var("ENV_VAR") + + editor = GitHubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) + + data = editor.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.file_editor.GitHubFileEditor", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "repo": "owner/repo", + "branch": "feature", + "raise_on_failure": False, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + data = { + "type": "haystack_integrations.components.connectors.github.file_editor.GitHubFileEditor", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "repo": "owner/repo", + "branch": "feature", + "raise_on_failure": False, + }, + } + + editor = GitHubFileEditor.from_dict(data) + + assert editor.github_token == Secret.from_env_var("ENV_VAR") + assert editor.default_repo == "owner/repo" + assert editor.default_branch == "feature" + assert editor.raise_on_failure is False + + @patch("requests.get") + @patch("requests.put") + def test_run_edit(self, mock_put, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.return_value.json.return_value = { + "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" + "sha": "abc123", + } + mock_get.return_value.raise_for_status.return_value = None + mock_put.return_value.raise_for_status.return_value = None + + editor = GitHubFileEditor() + + result = editor.run( + command=Command.EDIT, + payload={"path": "test.txt", "original": "Hello", "replacement": "Hi", "message": "Update greeting"}, + repo="owner/repo", + branch="main", + ) + + assert result["result"] == "Edit successful" + + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/test.txt", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubFileEditor", + "Authorization": "Bearer test-token", + }, + params={"ref": "main"}, + timeout=10, + ) + + mock_put.assert_called_once() + put_call = mock_put.call_args + assert put_call[0][0] == "https://api.github.com/repos/owner/repo/contents/test.txt" + assert put_call[1]["json"]["message"] == "Update greeting" + assert put_call[1]["json"]["sha"] == "abc123" + assert put_call[1]["json"]["branch"] == "main" + + @patch("requests.get") + @patch("requests.patch") + def test_run_undo(self, mock_patch, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "testuser"}), + "https://api.github.com/repos/owner/repo/commits": create_mock_response( + [{"author": {"login": "testuser"}, "sha": "abc123"}, {"author": {"login": "testuser"}, "sha": "def456"}] + ), + } + + def get_side_effect(url, **_): + return get_responses.get(url, create_mock_response({})) + + mock_get.side_effect = get_side_effect + + mock_patch.return_value.raise_for_status.return_value = None + + editor = GitHubFileEditor() + + result = editor.run( + command=Command.UNDO, payload={"message": "Undo last change"}, repo="owner/repo", branch="main" + ) + + assert result["result"] == "Successfully undid last change" + + assert mock_get.call_count == 3 # One for commits, one for user info, one for last commit check + mock_patch.assert_called_once_with( + "https://api.github.com/repos/owner/repo/git/refs/heads/main", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubFileEditor", + "Authorization": "Bearer test-token", + }, + json={"sha": "def456", "force": True}, + timeout=10, + ) + + @patch("requests.put") + def test_run_create(self, mock_put, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_put.return_value.raise_for_status.return_value = None + + editor = GitHubFileEditor() + + result = editor.run( + command=Command.CREATE, + payload={"path": "new.txt", "content": "New file content", "message": "Create new file"}, + repo="owner/repo", + branch="main", + ) + + assert result["result"] == "File created successfully" + + mock_put.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/new.txt", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubFileEditor", + "Authorization": "Bearer test-token", + }, + json={ + "message": "Create new file", + "content": "TmV3IGZpbGUgY29udGVudA==", # Base64 encoded "New file content" + "branch": "main", + }, + timeout=10, + ) + + @patch("requests.get") + @patch("requests.delete") + def test_run_delete(self, mock_delete, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.return_value.json.return_value = { + "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" + "sha": "abc123", + } + mock_get.return_value.raise_for_status.return_value = None + + mock_delete.return_value.raise_for_status.return_value = None + + editor = GitHubFileEditor() + + result = editor.run( + command=Command.DELETE, + payload={"path": "test.txt", "message": "Delete file"}, + repo="owner/repo", + branch="main", + ) + + assert result["result"] == "File deleted successfully" + + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/test.txt", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubFileEditor", + "Authorization": "Bearer test-token", + }, + params={"ref": "main"}, + timeout=10, + ) + + mock_delete.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/test.txt", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubFileEditor", + "Authorization": "Bearer test-token", + }, + json={"message": "Delete file", "sha": "abc123", "branch": "main"}, + timeout=10, + ) + + @patch("requests.get") + def test_run_error_handling(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.side_effect = requests.RequestException("API Error") + + editor = GitHubFileEditor(raise_on_failure=False) + + result = editor.run( + command=Command.EDIT, + payload={"path": "test.txt", "original": "Hello", "replacement": "Hi", "message": "Update greeting"}, + repo="owner/repo", + branch="main", + ) + + assert "Error: API Error" in result["result"] + + editor = GitHubFileEditor(raise_on_failure=True) + with pytest.raises(requests.RequestException): + editor.run( + command=Command.EDIT, + payload={"path": "test.txt", "original": "Hello", "replacement": "Hi", "message": "Update greeting"}, + repo="owner/repo", + branch="main", + ) diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py new file mode 100644 index 0000000000..f590061786 --- /dev/null +++ b/integrations/github/tests/test_file_editor_tool.py @@ -0,0 +1,223 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack import Pipeline +from haystack.components.agents import Agent +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.utils import Secret + +from haystack_integrations.prompts.github.file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from haystack_integrations.tools.github.file_editor_tool import GitHubFileEditorTool +from haystack_integrations.tools.github.utils import message_handler + + +class TestGitHubFileEditorTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubFileEditorTool() + assert tool.name == "file_editor" + assert tool.description == FILE_EDITOR_PROMPT + assert tool.parameters == FILE_EDITOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", + "data": { + "name": "file_editor", + "description": FILE_EDITOR_PROMPT, + "parameters": FILE_EDITOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "repo": None, + "branch": "main", + "raise_on_failure": True, + }, + } + tool = GitHubFileEditorTool.from_dict(tool_dict) + assert tool.name == "file_editor" + assert tool.description == FILE_EDITOR_PROMPT + assert tool.parameters == FILE_EDITOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubFileEditorTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool" + assert tool_dict["data"]["name"] == "file_editor" + assert tool_dict["data"]["description"] == FILE_EDITOR_PROMPT + assert tool_dict["data"]["parameters"] == FILE_EDITOR_SCHEMA + assert tool_dict["data"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["data"]["repo"] is None + assert tool_dict["data"]["branch"] == "main" + assert tool_dict["data"]["raise_on_failure"] + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool = GitHubFileEditorTool( + outputs_to_string={"source": "result", "handler": message_handler}, + inputs_from_state={"repo_state": "repo"}, + outputs_to_state={"file_content": {"source": "content", "handler": message_handler}}, + ) + + tool_dict = tool.to_dict() + assert tool_dict["data"]["outputs_to_string"] == { + "source": "result", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + assert tool_dict["data"]["inputs_from_state"] == {"repo_state": "repo"} + assert tool_dict["data"]["outputs_to_state"] == { + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + } + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool_dict = { + "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", + "data": { + "name": "file_editor", + "description": FILE_EDITOR_PROMPT, + "parameters": FILE_EDITOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "repo": None, + "branch": "main", + "raise_on_failure": True, + "outputs_to_string": { + "source": "result", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + "inputs_from_state": {"repo_state": "repo"}, + "outputs_to_state": { + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + }, + }, + } + + tool = GitHubFileEditorTool.from_dict(tool_dict) + assert tool.outputs_to_string["source"] == "result" + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repo_state": "repo"} + assert tool.outputs_to_state["file_content"]["source"] == "content" + assert tool.outputs_to_state["file_content"]["handler"] == message_handler + + def test_pipeline_serialization(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + monkeypatch.setenv("OPENAI_API_KEY", "test-token") + + file_editor = GitHubFileEditorTool() + + agent = Agent( + chat_generator=OpenAIChatGenerator(), + tools=[file_editor], + ) + + pipeline = Pipeline() + pipeline.add_component("agent", agent) + + pipeline_dict = pipeline.to_dict() + + # Remove http_client_kwargs from both dictionaries if it exists + # We don't want to test the http_client_kwargs because Haystack 2.12.0 doesn't have it + # Only Haystack 2.13.0+ has it + if "components" in pipeline_dict: + agent_params = pipeline_dict["components"]["agent"]["init_parameters"]["chat_generator"]["init_parameters"] + if "http_client_kwargs" in agent_params: + del agent_params["http_client_kwargs"] + + expected_dict = { + "metadata": {}, + "max_runs_per_component": 100, + "components": { + "agent": { + "type": "haystack.components.agents.agent.Agent", + "init_parameters": { + "chat_generator": { + "type": "haystack.components.generators.chat.openai.OpenAIChatGenerator", + "init_parameters": { + "model": "gpt-4o-mini", + "streaming_callback": None, + "api_base_url": None, + "organization": None, + "generation_kwargs": {}, + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "timeout": None, + "max_retries": None, + "tools": None, + "tools_strict": False, + }, + }, + "tools": [ + { + "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", + "data": { + "name": "file_editor", + "description": FILE_EDITOR_PROMPT, + "parameters": FILE_EDITOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "repo": None, + "branch": "main", + "raise_on_failure": True, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, + }, + } + ], + "system_prompt": None, + "exit_conditions": ["text"], + "state_schema": {}, + "max_agent_steps": 100, + "raise_on_tool_invocation_failure": False, + "streaming_callback": None, + }, + } + }, + "connections": [], + "connection_type_validation": True, + } + + assert pipeline_dict == expected_dict + + deserialized_pipeline = Pipeline.from_dict(pipeline_dict) + deserialized_components = [instance for _, instance in deserialized_pipeline.graph.nodes(data="instance")] + deserialized_agent = deserialized_components[0] + assert isinstance(deserialized_agent, Agent) + + agent_tools = deserialized_agent.tools + assert len(agent_tools) == 1 + assert isinstance(agent_tools[0], GitHubFileEditorTool) + assert agent_tools[0].name == "file_editor" + + # Verify the tool's parameters were preserved + assert agent_tools[0].name == "file_editor" + assert agent_tools[0].description == FILE_EDITOR_PROMPT + assert agent_tools[0].parameters == FILE_EDITOR_SCHEMA + assert agent_tools[0].github_token == Secret.from_env_var("GITHUB_TOKEN") + assert agent_tools[0].repo is None + assert agent_tools[0].branch == "main" + assert agent_tools[0].raise_on_failure diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py new file mode 100644 index 0000000000..b187218791 --- /dev/null +++ b/integrations/github/tests/test_issue_commenter.py @@ -0,0 +1,116 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +import requests +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.issue_commenter import GitHubIssueCommenter + + +class TestGitHubIssueCommenter: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + commenter = GitHubIssueCommenter() + assert commenter.github_token is not None + assert commenter.github_token.resolve_value() == "test-token" + assert commenter.raise_on_failure is True + assert commenter.retry_attempts == 2 + + def test_init_with_parameters(self): + token = Secret.from_token("test-token") + commenter = GitHubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) + assert commenter.github_token == token + assert commenter.raise_on_failure is False + assert commenter.retry_attempts == 3 + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + token = Secret.from_env_var("ENV_VAR") + + commenter = GitHubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) + + data = commenter.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.issue_commenter.GitHubIssueCommenter", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "retry_attempts": 3, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + data = { + "type": "haystack_integrations.components.connectors.github.issue_commenter.GitHubIssueCommenter", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "retry_attempts": 3, + }, + } + + commenter = GitHubIssueCommenter.from_dict(data) + + assert commenter.github_token == Secret.from_env_var("ENV_VAR") + assert commenter.raise_on_failure is False + assert commenter.retry_attempts == 3 + + @patch("requests.post") + def test_run(self, mock_post, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_post.return_value.raise_for_status.return_value = None + + commenter = GitHubIssueCommenter() + + result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") + + assert result["success"] is True + + mock_post.assert_called_once_with( + "https://api.github.com/repos/owner/repo/issues/123/comments", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubIssueCommenter", + "Authorization": "Bearer test-token", + }, + json={"body": "Test comment"}, + timeout=10, + ) + + @patch("requests.post") + def test_run_error_handling(self, mock_post, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_post.side_effect = requests.RequestException("API Error") + + commenter = GitHubIssueCommenter(raise_on_failure=False) + + result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") + + assert result["success"] is False + + commenter = GitHubIssueCommenter(raise_on_failure=True) + with pytest.raises(requests.RequestException): + commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") + + def test_parse_github_url(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + commenter = GitHubIssueCommenter() + + owner, repo, issue_number = commenter._parse_github_url("https://github.com/owner/repo/issues/123") + assert owner == "owner" + assert repo == "repo" + assert issue_number == 123 + + with pytest.raises(ValueError): + commenter._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_issue_commenter_tool.py b/integrations/github/tests/test_issue_commenter_tool.py new file mode 100644 index 0000000000..2a4c19a4f7 --- /dev/null +++ b/integrations/github/tests/test_issue_commenter_tool.py @@ -0,0 +1,132 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack.utils import Secret + +from haystack_integrations.prompts.github.issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA +from haystack_integrations.tools.github.issue_commenter_tool import GitHubIssueCommenterTool +from haystack_integrations.tools.github.utils import message_handler + + +class TestGitHubIssueCommenterTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueCommenterTool() + assert tool.name == "issue_commenter" + assert tool.description == ISSUE_COMMENTER_PROMPT + assert tool.parameters == ISSUE_COMMENTER_SCHEMA + assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool", + "data": { + "name": "issue_commenter", + "description": ISSUE_COMMENTER_PROMPT, + "parameters": ISSUE_COMMENTER_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "raise_on_failure": True, + "retry_attempts": 2, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, + }, + } + tool = GitHubIssueCommenterTool.from_dict(tool_dict) + assert tool.name == "issue_commenter" + assert tool.description == ISSUE_COMMENTER_PROMPT + assert tool.parameters == ISSUE_COMMENTER_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure + assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueCommenterTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool" + assert tool_dict["data"]["name"] == "issue_commenter" + assert tool_dict["data"]["description"] == ISSUE_COMMENTER_PROMPT + assert tool_dict["data"]["parameters"] == ISSUE_COMMENTER_SCHEMA + assert tool_dict["data"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["data"]["raise_on_failure"] + assert tool_dict["data"]["retry_attempts"] == 2 + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueCommenterTool( + name="test_issue_commenter", + description="Test description", + parameters={"type": "object", "properties": {}}, + github_token=None, + raise_on_failure=False, + retry_attempts=3, + outputs_to_string={"handler": message_handler}, + inputs_from_state={"repository": "repo"}, + outputs_to_state={"documents": {"source": "docs", "handler": message_handler}}, + ) + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool" + assert tool_dict["data"]["name"] == "test_issue_commenter" + assert tool_dict["data"]["description"] == "Test description" + assert tool_dict["data"]["parameters"] == {"type": "object", "properties": {}} + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["raise_on_failure"] is False + assert tool_dict["data"]["retry_attempts"] == 3 + assert ( + tool_dict["data"]["outputs_to_string"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"} + assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs" + assert ( + tool_dict["data"]["outputs_to_state"]["documents"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool", + "data": { + "name": "test_issue_commenter", + "description": "Test description", + "parameters": {"type": "object", "properties": {}}, + "github_token": None, + "raise_on_failure": False, + "retry_attempts": 3, + "outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"}, + "inputs_from_state": {"repository": "repo"}, + "outputs_to_state": { + "documents": { + "source": "docs", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + }, + }, + } + tool = GitHubIssueCommenterTool.from_dict(tool_dict) + assert tool.name == "test_issue_commenter" + assert tool.description == "Test description" + assert tool.parameters == {"type": "object", "properties": {}} + assert tool.github_token is None + assert tool.raise_on_failure is False + assert tool.retry_attempts == 3 + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repository": "repo"} + assert tool.outputs_to_state["documents"]["source"] == "docs" + assert tool.outputs_to_state["documents"]["handler"] == message_handler diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py new file mode 100644 index 0000000000..9e66bfbc6c --- /dev/null +++ b/integrations/github/tests/test_issue_viewer.py @@ -0,0 +1,152 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +import requests +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.issue_viewer import GitHubIssueViewer + + +class TestGitHubIssueViewer: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + viewer = GitHubIssueViewer() + assert viewer.github_token is None + assert viewer.raise_on_failure is True + assert viewer.retry_attempts == 2 + + def test_init_with_parameters(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + token = Secret.from_env_var("GITHUB_TOKEN") + viewer = GitHubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) + assert viewer.github_token == token + assert viewer.raise_on_failure is False + assert viewer.retry_attempts == 3 + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + token = Secret.from_env_var("ENV_VAR") + + viewer = GitHubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) + + data = viewer.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.issue_viewer.GitHubIssueViewer", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "retry_attempts": 3, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + data = { + "type": "haystack_integrations.components.connectors.github.issue_viewer.GitHubIssueViewer", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "retry_attempts": 3, + }, + } + + viewer = GitHubIssueViewer.from_dict(data) + + assert viewer.github_token == Secret.from_env_var("ENV_VAR") + assert viewer.raise_on_failure is False + assert viewer.retry_attempts == 3 + + @patch("requests.get") + def test_run(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.return_value.json.return_value = { + "body": "Issue body", + "title": "Issue title", + "number": 123, + "state": "open", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-02T00:00:00Z", + "user": {"login": "test_user"}, + "html_url": "https://github.com/owner/repo/issues/123", + "comments": 2, + "comments_url": "https://api.github.com/repos/owner/repo/issues/123/comments", + } + mock_get.return_value.raise_for_status.return_value = None + + mock_get.side_effect = [ + mock_get.return_value, # First call for issue + type( + "Response", + (), + { + "json": lambda: [ + { + "body": "Comment 1", + "created_at": "2023-01-01T01:00:00Z", + "updated_at": "2023-01-01T01:00:00Z", + "user": {"login": "commenter1"}, + "html_url": "https://github.com/owner/repo/issues/123#issuecomment-1", + }, + { + "body": "Comment 2", + "created_at": "2023-01-01T02:00:00Z", + "updated_at": "2023-01-01T02:00:00Z", + "user": {"login": "commenter2"}, + "html_url": "https://github.com/owner/repo/issues/123#issuecomment-2", + }, + ], + "raise_for_status": lambda: None, + }, + ), + ] + + viewer = GitHubIssueViewer() + + result = viewer.run(url="https://github.com/owner/repo/issues/123") + + assert len(result["documents"]) == 3 # 1 issue + 2 comments + assert result["documents"][0].meta["type"] == "issue" + assert result["documents"][1].meta["type"] == "comment" + assert result["documents"][2].meta["type"] == "comment" + + assert mock_get.call_count == 2 + + @patch("requests.get") + def test_run_error_handling(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.side_effect = requests.RequestException("API Error") + + viewer = GitHubIssueViewer(raise_on_failure=False) + + result = viewer.run(url="https://github.com/owner/repo/issues/123") + + assert len(result["documents"]) == 1 + assert result["documents"][0].meta["type"] == "error" + assert result["documents"][0].meta["error"] is True + + viewer = GitHubIssueViewer(raise_on_failure=True) + with pytest.raises(requests.RequestException): + viewer.run(url="https://github.com/owner/repo/issues/123") + + def test_parse_github_url(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + viewer = GitHubIssueViewer() + + owner, repo, issue_number = viewer._parse_github_url("https://github.com/owner/repo/issues/123") + assert owner == "owner" + assert repo == "repo" + assert issue_number == 123 + + with pytest.raises(ValueError): + viewer._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_issue_viewer_tool.py b/integrations/github/tests/test_issue_viewer_tool.py new file mode 100644 index 0000000000..8edb7b8a15 --- /dev/null +++ b/integrations/github/tests/test_issue_viewer_tool.py @@ -0,0 +1,128 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack_integrations.prompts.github.issue_viewer_prompt import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA +from haystack_integrations.tools.github.issue_viewer_tool import GitHubIssueViewerTool +from haystack_integrations.tools.github.utils import message_handler + + +class TestGitHubIssueViewerTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueViewerTool() + assert tool.name == "issue_viewer" + assert tool.description == ISSUE_VIEWER_PROMPT + assert tool.parameters == ISSUE_VIEWER_SCHEMA + assert tool.github_token is None + assert tool.raise_on_failure is True + assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool", + "data": { + "name": "test_issue_viewer", + "description": "Test description", + "parameters": {"type": "object", "properties": {}}, + "github_token": None, + "raise_on_failure": True, + "retry_attempts": 2, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, + }, + } + tool = GitHubIssueViewerTool.from_dict(tool_dict) + assert tool.name == "test_issue_viewer" + assert tool.description == "Test description" + assert tool.parameters == {"type": "object", "properties": {}} + assert tool.github_token is None + assert tool.raise_on_failure + assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueViewerTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool" + assert tool_dict["data"]["name"] == "issue_viewer" + assert tool_dict["data"]["description"] == ISSUE_VIEWER_PROMPT + assert tool_dict["data"]["parameters"] == ISSUE_VIEWER_SCHEMA + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["raise_on_failure"] is True + assert tool_dict["data"]["retry_attempts"] == 2 + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueViewerTool( + name="test_issue_viewer", + description="Test description", + parameters={"type": "object", "properties": {}}, + github_token=None, + raise_on_failure=False, + retry_attempts=3, + outputs_to_string={"handler": message_handler}, + inputs_from_state={"repository": "repo"}, + outputs_to_state={"documents": {"source": "docs", "handler": message_handler}}, + ) + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool" + assert tool_dict["data"]["name"] == "test_issue_viewer" + assert tool_dict["data"]["description"] == "Test description" + assert tool_dict["data"]["parameters"] == {"type": "object", "properties": {}} + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["raise_on_failure"] is False + assert tool_dict["data"]["retry_attempts"] == 3 + assert ( + tool_dict["data"]["outputs_to_string"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"} + assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs" + assert ( + tool_dict["data"]["outputs_to_state"]["documents"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool", + "data": { + "name": "test_issue_viewer", + "description": "Test description", + "parameters": {"type": "object", "properties": {}}, + "github_token": None, + "raise_on_failure": False, + "retry_attempts": 3, + "outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"}, + "inputs_from_state": {"repository": "repo"}, + "outputs_to_state": { + "documents": { + "source": "docs", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + }, + }, + } + tool = GitHubIssueViewerTool.from_dict(tool_dict) + assert tool.name == "test_issue_viewer" + assert tool.description == "Test description" + assert tool.parameters == {"type": "object", "properties": {}} + assert tool.github_token is None + assert tool.raise_on_failure is False + assert tool.retry_attempts == 3 + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repository": "repo"} + assert tool.outputs_to_state["documents"]["source"] == "docs" + assert tool.outputs_to_state["documents"]["handler"] == message_handler diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py new file mode 100644 index 0000000000..c27d19a942 --- /dev/null +++ b/integrations/github/tests/test_pr_creator.py @@ -0,0 +1,134 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +import requests +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.pr_creator import GitHubPRCreator + + +class TestGitHubPRCreator: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + pr_creator = GitHubPRCreator() + assert pr_creator.github_token is not None + assert pr_creator.github_token.resolve_value() == "test-token" + assert pr_creator.raise_on_failure is True + + def test_init_with_parameters(self): + token = Secret.from_token("test-token") + pr_creator = GitHubPRCreator(github_token=token, raise_on_failure=False) + assert pr_creator.github_token == token + assert pr_creator.raise_on_failure is False + + with pytest.raises(TypeError): + GitHubPRCreator(github_token="not_a_secret") + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + token = Secret.from_env_var("ENV_VAR") + + pr_creator = GitHubPRCreator(github_token=token, raise_on_failure=False) + + data = pr_creator.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.pr_creator.GitHubPRCreator", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + data = { + "type": "haystack_integrations.components.connectors.github.pr_creator.GitHubPRCreator", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + }, + } + + pr_creator = GitHubPRCreator.from_dict(data) + + assert pr_creator.github_token == Secret.from_env_var("ENV_VAR") + assert pr_creator.raise_on_failure is False + + @patch("requests.get") + @patch("requests.post") + def test_run(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.return_value.json.return_value = {"login": "test_user"} + mock_get.return_value.raise_for_status.return_value = None + + mock_post.return_value.json.return_value = {"number": 123} + mock_post.return_value.raise_for_status.return_value = None + + pr_creator = GitHubPRCreator() + + with patch.object(pr_creator, "_check_fork_exists", return_value=True): + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", + body="Test body", + draft=False, + ) + + assert result["result"] == "Pull request #123 created successfully and linked to issue #456" + + mock_get.assert_called_once() + mock_post.assert_called_once_with( + "https://api.github.com/repos/owner/repo/pulls", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubPRCreator", + "Authorization": "Bearer test-token", + }, + json={ + "title": "Test PR", + "body": "Test body", + "head": "test_user:feature-branch", + "base": "main", + "draft": False, + "maintainer_can_modify": True, + }, + timeout=10, + ) + + @patch("requests.get") + @patch("requests.post") + def test_run_error_handling(self, _, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.side_effect = requests.RequestException("API Error") + + pr_creator = GitHubPRCreator(raise_on_failure=False) + + with patch.object(pr_creator, "_check_fork_exists", return_value=True): + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", + ) + + assert "Error" in result["result"] + + pr_creator = GitHubPRCreator(raise_on_failure=True) + with pytest.raises(requests.RequestException): + pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", + ) diff --git a/integrations/github/tests/test_pr_creator_tool.py b/integrations/github/tests/test_pr_creator_tool.py new file mode 100644 index 0000000000..fd3e2d4499 --- /dev/null +++ b/integrations/github/tests/test_pr_creator_tool.py @@ -0,0 +1,130 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack.utils import Secret + +from haystack_integrations.prompts.github.pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA +from haystack_integrations.tools.github.pr_creator_tool import GitHubPRCreatorTool +from haystack_integrations.tools.github.utils import message_handler + + +class TestGitHubPRCreatorTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubPRCreatorTool() + assert tool.name == "pr_creator" + assert tool.description == PR_CREATOR_PROMPT + assert tool.parameters == PR_CREATOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure is True + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool", + "data": { + "name": "pr_creator", + "description": PR_CREATOR_PROMPT, + "parameters": PR_CREATOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "raise_on_failure": True, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, + }, + } + tool = GitHubPRCreatorTool.from_dict(tool_dict) + assert tool.name == "pr_creator" + assert tool.description == PR_CREATOR_PROMPT + assert tool.parameters == PR_CREATOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure is True + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubPRCreatorTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool" + assert tool_dict["data"]["name"] == "pr_creator" + assert tool_dict["data"]["description"] == PR_CREATOR_PROMPT + assert tool_dict["data"]["parameters"] == PR_CREATOR_SCHEMA + assert tool_dict["data"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["data"]["raise_on_failure"] is True + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubPRCreatorTool( + name="pr_creator", + description="PR Creator Tool", + parameters=PR_CREATOR_SCHEMA, + github_token=Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure=False, + outputs_to_string={"handler": message_handler}, + inputs_from_state={"repository": "repo"}, + outputs_to_state={"documents": {"source": "docs", "handler": message_handler}}, + ) + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool" + assert tool_dict["data"]["name"] == "pr_creator" + assert tool_dict["data"]["description"] == "PR Creator Tool" + assert tool_dict["data"]["parameters"] == PR_CREATOR_SCHEMA + assert tool_dict["data"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["data"]["raise_on_failure"] is False + assert ( + tool_dict["data"]["outputs_to_string"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"} + assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs" + assert ( + tool_dict["data"]["outputs_to_state"]["documents"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool", + "data": { + "name": "pr_creator", + "description": "PR Creator Tool", + "parameters": PR_CREATOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"}, + "inputs_from_state": {"repository": "repo"}, + "outputs_to_state": { + "documents": { + "source": "docs", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + }, + }, + } + tool = GitHubPRCreatorTool.from_dict(tool_dict) + assert tool.name == "pr_creator" + assert tool.description == "PR Creator Tool" + assert tool.parameters == PR_CREATOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure is False + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repository": "repo"} + assert tool.outputs_to_state["documents"]["source"] == "docs" + assert tool.outputs_to_state["documents"]["handler"] == message_handler diff --git a/integrations/github/tests/test_repo_forker.py b/integrations/github/tests/test_repo_forker.py new file mode 100644 index 0000000000..6355a3bd53 --- /dev/null +++ b/integrations/github/tests/test_repo_forker.py @@ -0,0 +1,263 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +import requests +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.repo_forker import GitHubRepoForker + + +class TestGitHubRepoForker: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + forker = GitHubRepoForker() + assert forker.github_token is not None + assert forker.github_token.resolve_value() == "test-token" + assert forker.raise_on_failure is True + assert forker.wait_for_completion is False + assert forker.max_wait_seconds == 300 + assert forker.poll_interval == 2 + assert forker.auto_sync is True + assert forker.create_branch is True + + def test_init_with_parameters(self): + token = Secret.from_token("test-token") + forker = GitHubRepoForker( + github_token=token, + raise_on_failure=False, + wait_for_completion=True, + max_wait_seconds=60, + poll_interval=1, + auto_sync=False, + create_branch=False, + ) + assert forker.github_token == token + assert forker.raise_on_failure is False + assert forker.wait_for_completion is True + assert forker.max_wait_seconds == 60 + assert forker.poll_interval == 1 + assert forker.auto_sync is False + assert forker.create_branch is False + + # Test with invalid token type + with pytest.raises(TypeError): + GitHubRepoForker(github_token="not_a_secret") + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + token = Secret.from_env_var("ENV_VAR") + + forker = GitHubRepoForker( + github_token=token, + raise_on_failure=False, + wait_for_completion=True, + max_wait_seconds=60, + poll_interval=1, + auto_sync=False, + create_branch=False, + ) + + data = forker.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.repo_forker.GitHubRepoForker", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "wait_for_completion": True, + "max_wait_seconds": 60, + "poll_interval": 1, + "auto_sync": False, + "create_branch": False, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + data = { + "type": "haystack_integrations.components.connectors.github.repo_forker.GitHubRepoForker", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "wait_for_completion": True, + "max_wait_seconds": 60, + "poll_interval": 1, + "auto_sync": False, + "create_branch": False, + }, + } + + forker = GitHubRepoForker.from_dict(data) + + assert forker.github_token == Secret.from_env_var("ENV_VAR") + assert forker.raise_on_failure is False + assert forker.wait_for_completion is True + assert forker.max_wait_seconds == 60 + assert forker.poll_interval == 1 + assert forker.auto_sync is False + assert forker.create_branch is False + + @patch("requests.get") + @patch("requests.post") + def test_run_create_fork(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "test_user"}), + "https://api.github.com/repos/test_user/repo": create_mock_response( + {}, status_code=404 + ), # Fork doesn't exist + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( + {"object": {"sha": "abc123"}} + ), + } + + def get_side_effect(url, **_): + if url == "https://api.github.com/repos/test_user/repo": + if mock_get.call_count == 2: + return create_mock_response({}, status_code=404) # Fork doesn't exist + return create_mock_response({"default_branch": "main"}) + return get_responses.get(url, create_mock_response({"default_branch": "main"})) + + mock_get.side_effect = get_side_effect + + def post_side_effect(url, **_): + if "forks" in url: + return create_mock_response({"owner": {"login": "test_user"}, "name": "repo"}) + return create_mock_response({}) + + mock_post.side_effect = post_side_effect + + forker = GitHubRepoForker(create_branch=True, auto_sync=False) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "test_user/repo" + assert result["issue_branch"] == "fix-123" + + assert mock_get.call_count == 5 # user (2x), check fork status, get default branch, get SHA + + get_calls = [call[0][0] for call in mock_get.call_args_list] + assert get_calls.count("https://api.github.com/user") == 2 # get user, check fork + assert get_calls.count("https://api.github.com/repos/test_user/repo") == 2 # check status, get default branch + assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls + + post_calls = [call[0][0] for call in mock_post.call_args_list] + assert "https://api.github.com/repos/owner/repo/forks" in post_calls + assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls + assert mock_post.call_count == 2 # One for fork creation, one for branch creation + + @patch("requests.get") + @patch("requests.post") + def test_run_sync_existing_fork(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "test_user"}), + "https://api.github.com/repos/test_user/repo": create_mock_response( + {"name": "repo", "default_branch": "main"} + ), + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( + {"object": {"sha": "abc123"}} + ), + } + + def get_side_effect(url, **_): + return get_responses.get(url, create_mock_response({"default_branch": "main"})) + + mock_get.side_effect = get_side_effect + + def post_side_effect(url, **_): + if "merge-upstream" in url: + return create_mock_response({}) + return create_mock_response({}) + + mock_post.side_effect = post_side_effect + + forker = GitHubRepoForker(create_branch=True, auto_sync=True) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "test_user/repo" + assert result["issue_branch"] == "fix-123" + + assert mock_get.call_count == 5 # user, check fork, check fork status, get default branch, get SHA + + get_calls = [call[0][0] for call in mock_get.call_args_list] + assert "https://api.github.com/user" in get_calls + assert "https://api.github.com/repos/test_user/repo" in get_calls + assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls + + post_calls = [call[0][0] for call in mock_post.call_args_list] + assert "https://api.github.com/repos/test_user/repo/merge-upstream" in post_calls + assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls + assert mock_post.call_count == 2 # One for sync, one for branch creation + + @patch("requests.get") + @patch("requests.post") + def test_run_error_handling(self, _, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.side_effect = requests.RequestException("API Error") + + forker = GitHubRepoForker(raise_on_failure=False) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "" + assert result["issue_branch"] is None + + forker = GitHubRepoForker(raise_on_failure=True) + with pytest.raises(requests.RequestException): + forker.run(url="https://github.com/owner/repo/issues/123") + + def test_parse_github_url(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + forker = GitHubRepoForker() + + owner, repo, issue_number = forker._parse_github_url("https://github.com/owner/repo/issues/123") + assert owner == "owner" + assert repo == "repo" + assert issue_number == "123" + + with pytest.raises(ValueError): + forker._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py new file mode 100644 index 0000000000..4bfdda1d74 --- /dev/null +++ b/integrations/github/tests/test_repo_viewer.py @@ -0,0 +1,176 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +import requests +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer + + +class TestGitHubRepoViewer: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + viewer = GitHubRepoViewer() + assert viewer.github_token is None + assert viewer.raise_on_failure is True + assert viewer.max_file_size == 1_000_000 + assert viewer.repo is None + assert viewer.branch == "main" + + def test_init_with_parameters(self): + token = Secret.from_token("test-token") + viewer = GitHubRepoViewer( + github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="test-branch" + ) + assert viewer.github_token == token + assert viewer.raise_on_failure is False + assert viewer.max_file_size == 500_000 + assert viewer.repo == "owner/repo" + assert viewer.branch == "test-branch" + + with pytest.raises(TypeError): + GitHubRepoViewer(github_token="not_a_secret") + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + token = Secret.from_env_var("ENV_VAR") + + viewer = GitHubRepoViewer( + github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="test-branch" + ) + + data = viewer.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepoViewer", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "max_file_size": 500_000, + "repo": "owner/repo", + "branch": "test-branch", + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + data = { + "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepoViewer", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "max_file_size": 500_000, + "repo": "owner/repo", + "branch": "test-branch", + }, + } + + viewer = GitHubRepoViewer.from_dict(data) + + assert viewer.github_token == Secret.from_env_var("ENV_VAR") + assert viewer.raise_on_failure is False + assert viewer.max_file_size == 500_000 + assert viewer.repo == "owner/repo" + assert viewer.branch == "test-branch" + + @patch("requests.get") + def test_run_file(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.return_value.json.return_value = { + "name": "README.md", + "path": "README.md", + "size": 100, + "html_url": "https://github.com/owner/repo/blob/main/README.md", + "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" + "encoding": "base64", + } + mock_get.return_value.raise_for_status.return_value = None + + viewer = GitHubRepoViewer() + + result = viewer.run(repo="owner/repo", path="README.md", branch="main") + + assert len(result["documents"]) == 1 + assert result["documents"][0].content == "Hello World" + assert result["documents"][0].meta["type"] == "file_content" + assert result["documents"][0].meta["path"] == "README.md" + + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/README.md?ref=main", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubRepoViewer", + }, + timeout=10, + ) + + @patch("requests.get") + def test_run_directory(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.return_value.json.return_value = [ + {"name": "docs", "path": "docs", "type": "dir", "html_url": "https://github.com/owner/repo/tree/main/docs"}, + { + "name": "README.md", + "path": "README.md", + "type": "file", + "size": 100, + "html_url": "https://github.com/owner/repo/blob/main/README.md", + }, + ] + mock_get.return_value.raise_for_status.return_value = None + + viewer = GitHubRepoViewer() + + result = viewer.run(repo="owner/repo", path="", branch="main") + + assert len(result["documents"]) == 2 + assert result["documents"][0].content == "docs" + assert result["documents"][0].meta["type"] == "dir" + assert result["documents"][1].content == "README.md" + assert result["documents"][1].meta["type"] == "file" + + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/?ref=main", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubRepoViewer", + }, + timeout=10, + ) + + @patch("requests.get") + def test_run_error_handling(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.side_effect = requests.RequestException("API Error") + + viewer = GitHubRepoViewer(raise_on_failure=False) + + result = viewer.run(repo="owner/repo", path="README.md", branch="main") + + assert len(result["documents"]) == 1 + assert result["documents"][0].meta["type"] == "error" + + viewer = GitHubRepoViewer(raise_on_failure=True) + with pytest.raises(requests.RequestException): + viewer.run(repo="owner/repo", path="README.md", branch="main") + + def test_parse_repo(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + viewer = GitHubRepoViewer() + + owner, repo = viewer._parse_repo("owner/repo") + assert owner == "owner" + assert repo == "repo" + + with pytest.raises(ValueError): + viewer._parse_repo("invalid_format") diff --git a/integrations/github/tests/test_repo_viewer_tool.py b/integrations/github/tests/test_repo_viewer_tool.py new file mode 100644 index 0000000000..d83a7a68d3 --- /dev/null +++ b/integrations/github/tests/test_repo_viewer_tool.py @@ -0,0 +1,135 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA +from haystack_integrations.tools.github.repo_viewer_tool import GitHubRepoViewerTool +from haystack_integrations.tools.github.utils import message_handler + + +class TestGitHubRepoViewerTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubRepoViewerTool() + assert tool.name == "repo_viewer" + assert tool.description == REPO_VIEWER_PROMPT + assert tool.parameters == REPO_VIEWER_SCHEMA + assert tool.max_file_size == 1_000_000 + assert tool.github_token is None + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure + assert tool.outputs_to_string == {"source": "documents", "handler": message_handler} + assert tool.inputs_from_state == {} + assert tool.outputs_to_state == {"documents": {"source": "documents"}} + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", + "data": { + "name": "repo_viewer", + "description": REPO_VIEWER_PROMPT, + "parameters": REPO_VIEWER_SCHEMA, + "github_token": None, + "repo": None, + "branch": "main", + "raise_on_failure": True, + "max_file_size": 1_000_000, + "outputs_to_string": { + "source": "documents", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + "inputs_from_state": {}, + "outputs_to_state": {"documents": {"source": "documents"}}, + }, + } + tool = GitHubRepoViewerTool.from_dict(tool_dict) + assert tool.name == "repo_viewer" + assert tool.description == REPO_VIEWER_PROMPT + assert tool.parameters == REPO_VIEWER_SCHEMA + assert tool.github_token is None + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure + assert tool.max_file_size == 1_000_000 + assert tool.outputs_to_string["source"] == "documents" + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {} + assert tool.outputs_to_state == {"documents": {"source": "documents"}} + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubRepoViewerTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool" + assert tool_dict["data"]["name"] == "repo_viewer" + assert tool_dict["data"]["description"] == REPO_VIEWER_PROMPT + assert tool_dict["data"]["parameters"] == REPO_VIEWER_SCHEMA + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["repo"] is None + assert tool_dict["data"]["branch"] == "main" + assert tool_dict["data"]["raise_on_failure"] + assert tool_dict["data"]["max_file_size"] == 1_000_000 + assert tool_dict["data"]["outputs_to_string"] == { + "source": "documents", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + assert tool_dict["data"]["inputs_from_state"] == {} + assert tool_dict["data"]["outputs_to_state"] == {"documents": {"source": "documents"}} + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool = GitHubRepoViewerTool( + outputs_to_string={"source": "result", "handler": message_handler}, + inputs_from_state={"repo_state": "repo"}, + outputs_to_state={"file_content": {"source": "content", "handler": message_handler}}, + ) + + tool_dict = tool.to_dict() + assert tool_dict["data"]["outputs_to_string"] == { + "source": "result", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + assert tool_dict["data"]["inputs_from_state"] == {"repo_state": "repo"} + assert tool_dict["data"]["outputs_to_state"] == { + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + } + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool_dict = { + "type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", + "data": { + "name": "repo_viewer", + "description": REPO_VIEWER_PROMPT, + "parameters": REPO_VIEWER_SCHEMA, + "github_token": None, + "repo": None, + "branch": "main", + "raise_on_failure": True, + "max_file_size": 1_000_000, + "outputs_to_string": { + "source": "result", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + "inputs_from_state": {"repo_state": "repo"}, + "outputs_to_state": { + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + }, + }, + } + + tool = GitHubRepoViewerTool.from_dict(tool_dict) + assert tool.outputs_to_string["source"] == "result" + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repo_state": "repo"} + assert tool.outputs_to_state["file_content"]["source"] == "content" + assert tool.outputs_to_state["file_content"]["handler"] == message_handler