From 4aba2a96f7bd814dd27f5ff7566c1071c4cda1fd Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 10 Apr 2025 14:35:07 +0200 Subject: [PATCH 01/51] add agent_prompts and github_components --- integrations/github-haystack/LICENSE.txt | 73 +++++ integrations/github-haystack/README.md | 21 ++ integrations/github-haystack/pyproject.toml | 61 ++++ .../src/github_haystack/__about__.py | 4 + .../src/github_haystack/__init__.py | 3 + .../github_haystack/agent_prompts/__init__.py | 8 + .../agent_prompts/comment_tool.py | 22 ++ .../github_haystack/agent_prompts/context.py | 175 ++++++++++ .../agent_prompts/file_editor_tool.py | 130 ++++++++ .../agent_prompts/pr_system_prompt.py | 53 ++++ .../agent_prompts/repo_viewer_tool.py | 78 +++++ .../agent_prompts/system_prompt.py | 61 ++++ .../github_components/file_editor.py | 299 ++++++++++++++++++ .../github_components/issue_commenter.py | 155 +++++++++ .../github_components/issue_viewer.py | 218 +++++++++++++ .../github_components/pr_creator.py | 171 ++++++++++ .../github_components/repo_viewer.py | 263 +++++++++++++++ .../github_components/repository_forker.py | 298 +++++++++++++++++ .../github-haystack/tests/__init__.py | 3 + 19 files changed, 2096 insertions(+) create mode 100644 integrations/github-haystack/LICENSE.txt create mode 100644 integrations/github-haystack/README.md create mode 100644 integrations/github-haystack/pyproject.toml create mode 100644 integrations/github-haystack/src/github_haystack/__about__.py create mode 100644 integrations/github-haystack/src/github_haystack/__init__.py create mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py create mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py create mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/context.py create mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py create mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py create mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py create mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py create mode 100644 integrations/github-haystack/src/github_haystack/github_components/file_editor.py create mode 100644 integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py create mode 100644 integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py create mode 100644 integrations/github-haystack/src/github_haystack/github_components/pr_creator.py create mode 100644 integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py create mode 100644 integrations/github-haystack/src/github_haystack/github_components/repository_forker.py create mode 100644 integrations/github-haystack/tests/__init__.py diff --git a/integrations/github-haystack/LICENSE.txt b/integrations/github-haystack/LICENSE.txt new file mode 100644 index 0000000000..137069b823 --- /dev/null +++ b/integrations/github-haystack/LICENSE.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/integrations/github-haystack/README.md b/integrations/github-haystack/README.md new file mode 100644 index 0000000000..a816d2c6d0 --- /dev/null +++ b/integrations/github-haystack/README.md @@ -0,0 +1,21 @@ +# github-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/github-haystack.svg)](https://pypi.org/project/github-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/github-haystack.svg)](https://pypi.org/project/github-haystack) + +----- + +## Table of Contents + +- [Installation](#installation) +- [License](#license) + +## Installation + +```console +pip install github-haystack +``` + +## License + +`github-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/github-haystack/pyproject.toml b/integrations/github-haystack/pyproject.toml new file mode 100644 index 0000000000..539aa0c09a --- /dev/null +++ b/integrations/github-haystack/pyproject.toml @@ -0,0 +1,61 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "github-haystack" +dynamic = ["version"] +description = 'Haystack components for interacting with GitHub repositories' +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "deepset GmbH", email = "info@deepset.ai" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [] + +[project.urls] +Documentation = "https://github.com/deepset GmbH/github-haystack#readme" +Issues = "https://github.com/deepset GmbH/github-haystack/issues" +Source = "https://github.com/deepset GmbH/github-haystack" + +[tool.hatch.version] +path = "src/github_haystack/__about__.py" + +[tool.hatch.envs.types] +extra-dependencies = [ + "mypy>=1.0.0", +] +[tool.hatch.envs.types.scripts] +check = "mypy --install-types --non-interactive {args:src/github_haystack tests}" + +[tool.coverage.run] +source_pkgs = ["github_haystack", "tests"] +branch = true +parallel = true +omit = [ + "src/github_haystack/__about__.py", +] + +[tool.coverage.paths] +github_haystack = ["src/github_haystack", "*/github-haystack/src/github_haystack"] +tests = ["tests", "*/github-haystack/tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] diff --git a/integrations/github-haystack/src/github_haystack/__about__.py b/integrations/github-haystack/src/github_haystack/__about__.py new file mode 100644 index 0000000000..348b27fe81 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/__about__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +__version__ = "0.0.1" diff --git a/integrations/github-haystack/src/github_haystack/__init__.py b/integrations/github-haystack/src/github_haystack/__init__.py new file mode 100644 index 0000000000..d391382c6b --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py b/integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py new file mode 100644 index 0000000000..984d948c61 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from .repo_viewer_tool import repo_viewer_prompt, repo_viewer_schema +from .system_prompt import issue_prompt + +_all_ = ["issue_prompt", "repo_viewer_prompt", "repo_viewer_schema"] diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py b/integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py new file mode 100644 index 0000000000..27baf46e5c --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py @@ -0,0 +1,22 @@ +comment_prompt = """ +Haystack-Agent uses this tool to post a comment to a Github-issue discussion. + + +Pass a `comment` string to post a comment. + + +IMPORTANT +Haystack-Agent MUST pass "comment" to this tool. Otherwise, comment creation fails. +Haystack-Agent always passes the contents of the comment to the "comment" parameter when calling this tool. +""" + +comment_schema = { + "properties": { + "comment": { + "type": "string", + "description": "The contents of the comment that you want to create." + } + }, + "required": ["comment"], + "type": "object" +} diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/context.py b/integrations/github-haystack/src/github_haystack/agent_prompts/context.py new file mode 100644 index 0000000000..ff45fccde9 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/agent_prompts/context.py @@ -0,0 +1,175 @@ +haystack_context_prompt = """ + +Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related +questions. +The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. +This information is not a replacement for carefully exploring relevant repositories before posting a comment. + +**Haystack Description** +An Open-Source Python framework for developers worldwide. +AI orchestration framework to build customizable, production-ready LLM applications. +Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. +With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or +conversational agent chatbots. + +**High-Level Architecture** +Haystack has two central abstractions: +- Components +- Pipelines + +A Component is a lightweight abstraction that gets inputs, performs an action and returns outputs. +Some example components: +- `OpenAIGenerator`: receives a prompt and generates replies to the prompt by calling an OpenAI-model +- `MetadataRouter`: routes documents to configurable outputs based on their metadata +- `BM25Retriever`: retrieves documents from a 'DocumentStore' based on the 'query'-input + +A component is lightweight. It is easy to implement custom components. Here is some information from the docs: + +Requirements + +Here are the requirements for all custom components: + +- `@component`: This decorator marks a class as a component, allowing it to be used in a pipeline. +- `run()`: This is a required method in every component. It accepts input arguments and returns a `dict`. The inputs can +either come from the pipeline when it’s executed, or from the output of another component when connected using +`connect()`. The `run()` method should be compatible with the input/output definitions declared for the component. +See an [Extended Example](#extended-example) below to check how it works. + +## Inputs and Outputs + +Next, define the inputs and outputs for your component. + +### Inputs + +You can choose between three input options: + +- `set_input_type`: This method defines or updates a single input socket for a component instance. It’s ideal for adding +or modifying a specific input at runtime without affecting others. Use this when you need to dynamically set or modify +a single input based on specific conditions. +- `set_input_types`: This method allows you to define multiple input sockets at once, replacing any existing inputs. +It’s useful when you know all the inputs the component will need and want to configure them in bulk. Use this when you +want to define multiple inputs during initialization. +- Declaring arguments directly in the `run()` method. Use this method when the component’s inputs are static and known +at the time of class definition. + +### Outputs + +You can choose between two output options: + +- `@component.output_types`: This decorator defines the output types and names at the time of class definition. The +output names and types must match the `dict` returned by the `run()` method. Use this when the output types are static +and known in advance. This decorator is cleaner and more readable for static components. +- `set_output_types`: This method defines or updates multiple output sockets for a component instance at runtime. +It’s useful when you need flexibility in configuring outputs dynamically. Use this when the output types need to be set +at runtime for greater flexibility. + +# Short Example + +Here is an example of a simple minimal component setup: + +```python +from haystack import component + +@component +class WelcomeTextGenerator: + ''' + A component generating personal welcome message and making it upper case + ''' + @component.output_types(welcome_text=str, note=str) + def run(self, name:str): + return {"welcome_text": f'Hello {name}, welcome to Haystack!'.upper(), "note": "welcome message is ready"} + +``` + +Here, the custom component `WelcomeTextGenerator` accepts one input: `name` string and returns two outputs: +`welcome_text` and `note`. + + +---------- + +**Pipelines** +The pipelines in Haystack 2.0 are directed multigraphs of different Haystack components and integrations. +They give you the freedom to connect these components in various ways. This means that the +pipeline doesn't need to be a continuous stream of information. With the flexibility of Haystack pipelines, +you can have simultaneous flows, standalone components, loops, and other types of connections. + +# Steps to Create a Pipeline Explained + +Once all your components are created and ready to be combined in a pipeline, there are four steps to make it work: + +1. Create the pipeline with `Pipeline()`. + This creates the Pipeline object. +2. Add components to the pipeline, one by one, with `.add_component(name, component)`. + This just adds components to the pipeline without connecting them yet. It's especially useful for loops as it allows + the smooth connection of the components in the next step because they all already exist in the pipeline. +3. Connect components with `.connect("producer_component.output_name", "consumer_component.input_name")`. + At this step, you explicitly connect one of the outputs of a component to one of the inputs of the next component. + This is also when the pipeline validates the connection without running the components. It makes the validation fast. +4. Run the pipeline with `.run({"component_1": {"mandatory_inputs": value}})`. + Finally, you run the Pipeline by specifying the first component in the pipeline and passing its mandatory inputs. + + Optionally, you can pass inputs to other components, for example: + `.run({"component_1": {"mandatory_inputs": value}, "component_2": {"inputs": value}})`. + +The full pipeline [example](/docs/creating-pipelines#example) in [Creating Pipelines](/docs/creating-pipelines) shows +how all the elements come together to create a working RAG pipeline. + +Once you create your pipeline, you can [visualize it in a graph](/docs/drawing-pipeline-graphs) to understand how the +components are connected and make sure that's how you want them. You can use Mermaid graphs to do that. + +# Validation + +Validation happens when you connect pipeline components with `.connect()`, but before running the components to make it +faster. The pipeline validates that: + +- The components exist in the pipeline. +- The components' outputs and inputs match and are explicitly indicated. For example, if a component produces two +outputs, when connecting it to another component, you must indicate which output connects to which input. +- The components' types match. +- For input types other than `Variadic`, checks if the input is already occupied by another connection. + +All of these checks produce detailed errors to help you quickly fix any issues identified. + +# Serialization + +Thanks to serialization, you can save and then load your pipelines. Serialization is converting a Haystack pipeline +into a format you can store on disk or send over the wire. It's particularly useful for: + +- Editing, storing, and sharing pipelines. +- Modifying existing pipelines in a format different than Python. + +Haystack pipelines delegate the serialization to its components, so serializing a pipeline simply means serializing +each component in the pipeline one after the other, along with their connections. The pipeline is serialized into a +dictionary format, which acts as an intermediate format that you can then convert into the final format you want. + +> 📘 Serialization formats +> +> Haystack 2.0 only supports YAML format at this time. We'll be rolling out more formats gradually. + +For serialization to be possible, components must support conversion from and to Python dictionaries. All Haystack +components have two methods that make them serializable: `from_dict` and `to_dict`. The `Pipeline` class, in turn, has +its own `from_dict` and `to_dict` methods that take care of serializing components and connections. + + +--------- + +**Haystack Repositories** + +1. "deepset-ai/haystack" + +Contains the core code for the Haystack framework and a few components. +The components that are part of this repository typically don't have heavy dependencies. + + +2. "deepset-ai/haystack-core-integrations" + +This is a mono-repo maintained by the deepset-Team that contains integrations for the Haystack framework. +Typically, an integration consists of one or more components. Some integrations only contain document stores. +Each integration is a standalone pypi-package but you can find all of them in the core integrations repo. + + +3. "deepset-ai/haystack-experimental" + +Contains experimental features for the Haystack framework. + +""" diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py b/integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py new file mode 100644 index 0000000000..61ac77b0d5 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py @@ -0,0 +1,130 @@ +file_editor_prompt = """ +Use the file editor to edit an existing file in the repository. + +You must provide a 'command' for the action that you want to perform: +- edit +- create +- delete +- undo + +The 'payload' contains your options for each command. + +**Command 'edit'** + +To edit a file, you need to provide: +1. The path to the file +2. The original code snippet from the file +3. Your replacement code +4. A commit message + +The code will only be replaced if it is unique in the file. Pass a minimum of 2 consecutive lines that should +be replaced. If the original is not unique, the editor will return an error. +Pay attention to whitespace both for the original as well as the replacement. + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +Example: +{ + "command": "edit", + "payload": { + "path": "README.md", + "original": "This is a placeholder description!\\nIt should be updated.", + "replacement": "This project helps developers test AI applications.", + "message": "docs: README should mention project purpose." + } +} + + +**Command 'create'** + +To create a file, you need to provide: +1. The path for the new file +2. The content for the file +3. A commit message + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +IMPORTANT: +You MUST ALWAYS provide 'content' when creating a new file. File creation with empty content does not work. + +Example: +{ + "command": "create", + "payload": { + "path": "CONTRIBUTING.md", + "content": "Contributions are welcome, please write tests and follow our code style guidelines.", + "message": "chore: minimal instructions for contributors" + } +} + + +**Command 'delete'** + +To delete a file, you need to provide: +1. The path to the file to delete +2. A commit message + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +Example: +{ + "command": "delete", + "payload": { + "path": "tests/components/test_messaging", + "message": "chore: messaging feature was removed" + } +} + +**Command 'undo'** + +This is how to undo your latest change. + +Important notes: +- You can only undo your own changes +- You can only undo one change at a time +- You need to provide a message for the undo operation + +Example: +{ + "command": "undo", + "payload": { + "message": "revert: undo previous commit due to failing tests" + } +} +""" + +file_editor_schema = { + "type": "object", + "properties": { + "command": { + "type": "string", + "enum": ["edit", "create", "delete", "undo"], + "description": "The command to execute" + }, + "payload": { + "type": "object", + "required": ["message"], + "properties": { + "message": { + "type": "string" + }, + "content": { + "type": "string" + }, + "path": { + "type": "string" + }, + "original": { + "type": "string" + }, + "replacement": { + "type": "string" + } + } + } + }, + "required": ["command", "payload"] +} diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py b/integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py new file mode 100644 index 0000000000..a99c8ae100 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py @@ -0,0 +1,53 @@ +system_prompt = """ +The assistant is Haystack-Agent, created by deepset. +Haystack-Agent creates Pull Requests that resolve GitHub issues. + +Haystack-Agent receives a GitHub issue and all current comments. +Haystack-Agent analyzes the issue, creates code changes, and submits a Pull Request. + +**Issue Analysis** +Haystack-Agent reviews all implementation suggestions in the comments. +Haystack-Agent evaluates each proposed approach and determines if it adequately solves the issue. +Haystack-Agent uses the `repository_viewer` utility to examine repository files. +Haystack-Agent views any files that are directly referenced in the issue, to understand the context of the issue. +Haystack-Agent follows instructions that are provided in the comments, when they make sense. + +**Software Engineering** +Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. +Haystack-Agent finds the right level of abstraction and complexity. +When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and +documentation patterns that are already being used in the codebase. +Haystack-Agent may propose better code style, documentation, or architecture when appropriate. +Haystack-Agent needs context on the code being discussed before starting to resolve the issue. +Haystack-Agent produces code that can be merged without needing manual intervention from other developers. +Haystack-Agent adapts to the comment style, that is already being used in the codebase. +It avoids superfluous comments that point out the obvious. When Haystack-Agent wants to explain code changes, +it uses the PR description for that. + +**Thinking Process** +Haystack-Agent thinks thoroughly about each issue. +Haystack-Agent takes time to consider all aspects of the implementation. +A lengthy thought process is acceptable and often necessary for proper resolution. + + +Haystack-Agent notes down any thoughts and observations in the scratchpad, so that it can reference them later. + + +**Resolution Process** +Haystack-Agent follows these steps to resolve issues: + +1. Analyze the issue and comments, noting all proposed implementations +2. Explore the repository from the root (/) directory +3. Examine files referenced in the issue or comments +4. View additional files and test cases to understand intended behavior +5. Create initial test cases to validate the planned solution +6. Edit repository source code to resolve the issue +7. Update test cases to match code changes +8. Handle edge cases and ensure code matches repository style +9. Create a Pull Request using the `create_pr` utility + +**Pull Request Creation** +Haystack-Agent writes clear Pull Request descriptions. +Each description explains what changes were made and why they were necessary. +The description helps reviewers understand the implementation approach. +""" diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py b/integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py new file mode 100644 index 0000000000..df9cad616b --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py @@ -0,0 +1,78 @@ +repo_viewer_prompt = """ +Haystack-Agent uses this tool to browse GitHub repositories. +Haystack-Agent can view directories and files with this tool. + + +Pass a `repo` string for the repository that you want to view. +It is required to pass `repo` to use this tool. +The structure is "owner/repo-name". + +Pass a `path` string for the directory or file that you want to view. +If you pass an empty path, you will view the root directory of the repository. + +Examples: + +- {"repo": "pandas-dev/pandas", "path": ""} + - will show you the root of the pandas repository +- {"repo": "pandas-dev/pandas", "path": "pyproject.toml"} + - will show you the "pyproject.toml"-file of the pandas repository +- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert"} + - will show you the "albert"-directory in the transformers repository +- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert/albert_modelling.py"} + - will show you the source code for the albert model in the transformers repository + + +Haystack-Agent uses the `github_repository_viewer` to view relevant code. +Haystack-Agent starts at the root of the repository. +Haystack-Agent navigates one level at a time using directory listings. +Haystack-Agent views all relevant code, testing, configuration, or documentation files on a level. +It never skips a directory level or guesses full paths. + +Haystack-Agent thinks deeply about the content of a repository. Before Haystack-Agent uses the tool, it reasons about +next steps: + + +- What am I looking for in this location? +- Why is this path potentially relevant? +- What specific files might help solve the issue? +- What patterns or implementations should I look for? + + +After viewing the contents of a file or directory, Haystack-Agent reflects on its observations before moving on: + +- What did I learn from these files? +- What else might be related? +- Where should I look next and why? + + +IMPORTANT +Haystack-Agent views the content of relevant files, it knows that it is not enough to explore the directory structure. +Haystack-Agent needs to read the code to understand it properly. +To view a file, Haystack-Agent passes the full path of the file to the `github_repository_viewer`. +Haystack-Agent never guesses a file or directory path. + +Haystack-Agent takes notes after viewing code: + +- extract important code snippets +- document key functions, classes or configurations +- note key architecture patterns +- relate findings to the original issue +- relate findings to other code that was already viewed +- note down file paths as a reference + +""" + +repo_viewer_schema = { + "properties": { + "repo": { + "type": "string", + "description": "The owner/repository_name that you want to view." + }, + "path": { + "type": "string", + "description": "Path to directory or file to view. Defaults to repository root.", + } + }, + "required": ["repo"], + "type": "object" +} diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py b/integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py new file mode 100644 index 0000000000..14b612a08e --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py @@ -0,0 +1,61 @@ +issue_prompt = """ +The assistant is Haystack-Agent, created by deepset. +Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. + +Haystack-Agent receives a GitHub issue and all current comments. +Haystack-Agent participates in the discussion by: +- helping users find answers to their questions +- analyzing bug reports and proposing a fix when necessary +- analyzing feature requests and proposing an implementation +- being a sounding board in architecture discussions and proposing alternative solutions + +**Style** +Haystack-Agent uses Markdown formatting. When using Markdown, Haystack-Agent always follows best practices for clarity +and consistency. +It always uses a single space after hash symbols for headers (e.g., ”# Header 1”) and leaves a blank line before and +after headers, lists, and code blocks. For emphasis, Haystack-Agent uses asterisks or underscores consistently +(e.g., italic or bold). When creating lists, it aligns items properly and uses a single space after the list marker. +For nested bullets in bullet point lists, Haystack-Agent uses two spaces before the asterisk (*) or hyphen (-) for each +level of nesting. For nested bullets in numbered lists, Haystack-Agent uses three spaces before the number and period +(e.g., “1.”) for each level of nesting. When writing code, Haystack-Agent uses Markdown-blocks with appropriate language +annotation. + +**Software Engineering** +Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. +Haystack-Agent finds the right level of abstraction and complexity. +When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and +documentation patterns that are already being used in the codebase. +Haystack-Agent may propose better code style, documentation, or architecture when appropriate. +Haystack-Agent needs context on the code being discussed before responding with a comment. +Haystack-Agent does not craft any comments without knowing the code being discussed. +Haystack-Agent can explore any repository on GitHub and view its contents. + +**Exploring Repositories** +Haystack-Agent uses the `repository_viewer` to explore GitHub repositories before crafting a comment. +Haystack-Agent explores more than one repository when the GitHub discussions mentions multiple relevant repositories. + +**Thinking** +Haystack-Agent is a rigorous thinker. It uses -blocks to gather thoughts, reflect on the issue at +hand, and relate its learnings to it. It is not afraid of a lengthy thought process, because it knows that Software +Engineering is a challenging discipline. +Haystack-Agent takes notes on the . The scratchpad holds important pieces of information that +Haystack-Agent wants to reference later. + +**Comments** +Haystack-Agent is friendly, uses accessible language and keeps comments as simple as possible. +When developers address Haystack-Agent directly, it follows their instructions and finds the best response to their +comment. Haystack-Agent is happy to revise its code when a developer asks for it. +Haystack-Agent may disagree with a developer, when the changes being asked for clearly don't help to resolve the issue +or when Haystack-Agent has found a better approach to solving it. +Haystack-Agent uses the `create_comment`-tool to create a comment. Before creating a comment, Haystack-Agent reflects on +the issue, and any learnings from the code analysis. Haystack-Agent only responds when ready. + + +Haystack-Agent, this is IMPORTANT: +- DO NOT START WRITING YOUR RESPONSE UNTIL YOU HAVE COMPLETED THE ENTIRE EXPLORATION PHASE +- VIEWING DIRECTORY LISTINGS IS NOT ENOUGH - YOU MUST EXAMINE FILE CONTENTS +- If you find yourself running out of context space during exploration, say: "I need to continue exploring the codebase +before providing a complete response." Then continue exploration in the next interaction. + +Haystack-Agent will now receive its tools including instructions and will then participate in a Github-issue discussion. +""" diff --git a/integrations/github-haystack/src/github_haystack/github_components/file_editor.py b/integrations/github-haystack/src/github_haystack/github_components/file_editor.py new file mode 100644 index 0000000000..cfacfa213e --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/github_components/file_editor.py @@ -0,0 +1,299 @@ +from base64 import b64decode, b64encode +from enum import StrEnum +from typing import Any, Dict, Optional, Union + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + +class Command(StrEnum): + """ + Available commands for file operations in GitHub. + + Attributes: + EDIT: Edit an existing file by replacing content + UNDO: Revert the last commit if made by the same user + CREATE: Create a new file + DELETE: Delete an existing file + """ + EDIT = "edit" + UNDO = "undo" + CREATE = "create" + DELETE = "delete" + +@component +class GithubFileEditor: + """ + A Haystack component for editing files in GitHub repositories. + + Supports editing, undoing changes, deleting files, and creating new files + through the GitHub API. + + ### Usage example + ```python + from haystack.components.actions import GithubFileEditor + from haystack.utils import Secret + + # Initialize with default repo and branch + editor = GithubFileEditor( + github_token=Secret.from_env_var("GITHUB_TOKEN"), + repo="owner/repo", + branch="main" + ) + + # Edit a file using default repo and branch + result = editor.run( + command=Command.EDIT, + payload={ + "path": "path/to/file.py", + "original": "def old_function():", + "replacement": "def new_function():", + "message": "Renamed function for clarity" + } + ) + + # Edit a file in a different repo/branch + result = editor.run( + command=Command.EDIT, + repo="other-owner/other-repo", # Override default repo + branch="feature", # Override default branch + payload={ + "path": "path/to/file.py", + "original": "def old_function():", + "replacement": "def new_function():", + "message": "Renamed function for clarity" + } + ) + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + repo: Optional[str] = None, + branch: str = "main", + raise_on_failure: bool = True + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + """ + if not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.default_repo = repo + self.default_branch = branch + self.raise_on_failure = raise_on_failure + + self.headers = { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"Bearer {self.github_token.resolve_value()}", + "User-Agent": "Haystack/GithubFileEditor" + } + + def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: + """Get file content and SHA from GitHub.""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + response = requests.get(url, headers=self.headers, params={"ref": branch}) + response.raise_for_status() + data = response.json() + content = b64decode(data["content"]).decode("utf-8") + return content, data["sha"] + + def _update_file( + self, + owner: str, + repo: str, + path: str, + content: str, + message: str, + sha: str, + branch: str + ) -> bool: + """Update file content on GitHub.""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + payload = { + "message": message, + "content": b64encode(content.encode("utf-8")).decode("utf-8"), + "sha": sha, + "branch": branch + } + response = requests.put(url, headers=self.headers, json=payload) + response.raise_for_status() + return True + + def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: + """Check if last commit was made by the current token user.""" + url = f"https://api.github.com/repos/{owner}/{repo}/commits" + response = requests.get(url, headers=self.headers, params={"per_page": 1, "sha": branch}) + response.raise_for_status() + last_commit = response.json()[0] + commit_author = last_commit["author"]["login"] + + # Get current user + user_response = requests.get("https://api.github.com/user", headers=self.headers) + user_response.raise_for_status() + current_user = user_response.json()["login"] + + return commit_author == current_user + + def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file editing.""" + try: + content, sha = self._get_file_content(owner, repo, payload["path"], branch) + + # Check if original string is unique + occurrences = content.count(payload["original"]) + if occurrences == 0: + return "Error: Original string not found in file" + if occurrences > 1: + return "Error: Original string appears multiple times. Please provide more context" + + # Perform the replacement + new_content = content.replace(payload["original"], payload["replacement"]) + success = self._update_file( + owner, repo, payload["path"], new_content, payload["message"], sha, branch + ) + return "Edit successful" if success else "Edit failed" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle undoing changes.""" + try: + if not self._check_last_commit(owner, repo, branch): + return "Error: Last commit was not made by the current user" + + # Reset to previous commit + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch}" + commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" + + # Get the previous commit SHA + commits = requests.get( + commits_url, + headers=self.headers, + params={"per_page": 2, "sha": branch} + ).json() + previous_sha = commits[1]["sha"] + + # Update branch reference to previous commit + payload = {"sha": previous_sha, "force": True} + response = requests.patch(url, headers=self.headers, json=payload) + response.raise_for_status() + + return "Successfully undid last change" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file creation.""" + try: + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" + content = b64encode(payload["content"].encode("utf-8")).decode("utf-8") + + data = { + "message": payload["message"], + "content": content, + "branch": branch + } + + response = requests.put(url, headers=self.headers, json=data) + response.raise_for_status() + return "File created successfully" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file deletion.""" + try: + content, sha = self._get_file_content(owner, repo, payload["path"], branch) + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" + + data = { + "message": payload["message"], + "sha": sha, + "branch": branch + } + + response = requests.delete(url, headers=self.headers, json=data) + response.raise_for_status() + return "File deleted successfully" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + @component.output_types(result=str) + def run( + self, + command: Union[Command, str], + payload: Dict[str, Any], + repo: Optional[str] = None, + branch: Optional[str] = None + ) -> Dict[str, str]: + """ + Process GitHub file operations. + + :param command: Operation to perform ("edit", "undo", "create", "delete") + :param payload: Dictionary containing command-specific parameters + :param repo: Repository in owner/repo format (overrides default if provided) + :param branch: Branch to perform operations on (overrides default if provided) + :return: Dictionary containing operation result + """ + if repo is None: + if self.default_repo is None: + return { + "result": "Error: No repository specified. Either provide it in initialization or in run() method" + } + repo = self.default_repo + + working_branch = branch if branch is not None else self.default_branch + owner, repo_name = repo.split("/") + + command_handlers = { + Command.EDIT: self._edit_file, + Command.UNDO: self._undo_changes, + Command.CREATE: self._create_file, + Command.DELETE: self._delete_file + } + + if command not in command_handlers: + return {"result": f"Error: Unknown command '{command}'"} + + result = command_handlers[command](owner, repo_name, payload, working_branch) + return {"result": result} + + def to_dict(self) -> Dict[str, Any]: + """Serialize the component to a dictionary.""" + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + repo=self.default_repo, + branch=self.default_branch, + raise_on_failure=self.raise_on_failure + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubFileEditor": + """Deserialize the component from a dictionary.""" + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + diff --git a/integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py b/integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py new file mode 100644 index 0000000000..f564eb4e7a --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py @@ -0,0 +1,155 @@ +import re +from typing import Any, Dict, Optional + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import deserialize_secrets_inplace +from haystack.utils.auth import Secret + +logger = logging.getLogger(__name__) + + +@component +class GithubIssueCommenter: + """ + Posts comments to GitHub issues. + + The component takes a GitHub issue URL and comment text, then posts the comment + to the specified issue using the GitHub API. + + ### Usage example + ```python + from haystack.components.writers import GithubIssueCommenter + + commenter = GithubIssueCommenter(github_token=Secret.from_env_var("GITHUB_TOKEN")) + result = commenter.run( + url="https://github.com/owner/repo/issues/123", + comment="Thanks for reporting this issue! We'll look into it." + ) + + assert result["success"] is True + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication as a Secret + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + """ + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + # Set base headers during initialization + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubIssueCommenter", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, int]: + """ + Parse GitHub URL into owner, repo and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + raise ValueError(f"Invalid GitHub issue URL format: {url}") + + owner, repo, issue_number = match.groups() + return owner, repo, int(issue_number) + + def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) -> bool: + """ + Post a comment to a GitHub issue. + + :param owner: Repository owner + :param repo: Repository name + :param issue_number: Issue number + :param comment: Comment text to post + :return: True if comment was posted successfully + :raises requests.exceptions.RequestException: If the API request fails + """ + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments" + data = {"body": comment} + + for attempt in range(self.retry_attempts): + try: + response = requests.post(url, headers=self._get_request_headers(), json=data) + response.raise_for_status() + return True + except requests.exceptions.RequestException as e: + if attempt == self.retry_attempts - 1: + raise + logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying...") + + return False + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueCommenter": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(success=bool) + def run(self, url: str, comment: str) -> dict: + """ + Post a comment to a GitHub issue. + + :param url: GitHub issue URL + :param comment: Comment text to post + :return: Dictionary containing success status + """ + try: + owner, repo, issue_number = self._parse_github_url(url) + success = self._post_comment(owner, repo, issue_number, comment) + return {"success": success} + + except Exception as e: + if self.raise_on_failure: + raise + + error_message = f"Error posting comment to GitHub issue {url}: {str(e)}" + logger.warning(error_message) + return {"success": False} diff --git a/integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py b/integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py new file mode 100644 index 0000000000..89fdaea771 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py @@ -0,0 +1,218 @@ +import re +from typing import Any, Dict, List, Optional + +import requests +from haystack import Document, component, default_from_dict, default_to_dict, logging +from haystack.utils import deserialize_secrets_inplace +from haystack.utils.auth import Secret + +logger = logging.getLogger(__name__) + + +@component +class GithubIssueViewer: + """ + Fetches and parses GitHub issues into Haystack documents. + + The component takes a GitHub issue URL and returns a list of documents where: + - First document contains the main issue content + - Subsequent documents contain the issue comments + + ### Usage example + ```python + from haystack.components.fetchers import GithubIssueViewer + + viewer = GithubIssueViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) + docs = viewer.run( + url="https://github.com/owner/repo/issues/123" + )["documents"] + + assert len(docs) >= 1 # At least the main issue + assert docs[0].meta["type"] == "issue" + ``` + """ + + def __init__( + self, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication as a Secret + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + """ + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + # Only set the basic headers during initialization + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubIssueViewer", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.headers.copy() + if self.github_token: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, int]: + """ + Parse GitHub URL into owner, repo and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + raise ValueError(f"Invalid GitHub issue URL format: {url}") + + owner, repo, issue_number = match.groups() + return owner, repo, int(issue_number) + + def _fetch_issue(self, owner: str, repo: str, issue_number: int) -> Any: + """ + Fetch issue data from GitHub API. + + :param owner: Repository owner + :param repo: Repository name + :param issue_number: Issue number + :return: Issue data dictionary + """ + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}" + response = requests.get(url, headers=self._get_request_headers()) + response.raise_for_status() + return response.json() + + def _fetch_comments(self, comments_url: str) -> Any: + """ + Fetch issue comments from GitHub API. + + :param comments_url: URL for issue comments + :return: List of comment dictionaries + """ + response = requests.get(comments_url, headers=self._get_request_headers()) + response.raise_for_status() + return response.json() + + def _create_issue_document(self, issue_data: dict) -> Document: + """ + Create a Document from issue data. + + :param issue_data: Issue data from GitHub API + :return: Haystack Document + """ + return Document( # type: ignore + content=issue_data["body"], + meta={ + "type": "issue", + "title": issue_data["title"], + "number": issue_data["number"], + "state": issue_data["state"], + "created_at": issue_data["created_at"], + "updated_at": issue_data["updated_at"], + "author": issue_data["user"]["login"], + "url": issue_data["html_url"], + }, + ) + + def _create_comment_document( + self, comment_data: dict, issue_number: int + ) -> Document: + """ + Create a Document from comment data. + + :param comment_data: Comment data from GitHub API + :param issue_number: Parent issue number + :return: Haystack Document + """ + return Document( + content=comment_data["body"], + meta={ + "type": "comment", + "issue_number": issue_number, + "created_at": comment_data["created_at"], + "updated_at": comment_data["updated_at"], + "author": comment_data["user"]["login"], + "url": comment_data["html_url"], + }, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueViewer": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(documents=List[Document]) + def run(self, url: str) -> dict: + """ + Process a GitHub issue URL and return documents. + + :param url: GitHub issue URL + :return: Dictionary containing list of documents + """ + try: + owner, repo, issue_number = self._parse_github_url(url) + + # Fetch issue data + issue_data = self._fetch_issue(owner, repo, issue_number) + documents = [self._create_issue_document(issue_data)] + + # Fetch and process comments if they exist + if issue_data["comments"] > 0: + comments = self._fetch_comments(issue_data["comments_url"]) + documents.extend( + self._create_comment_document(comment, issue_number) + for comment in comments + ) + + return {"documents": documents} + + except Exception as e: + if self.raise_on_failure: + raise + + error_message = f"Error processing GitHub issue {url}: {str(e)}" + logger.warning(error_message) + error_doc = Document( + content=error_message, + meta={ + "error": True, + "type": "error", + "url": url, + } + ) + return {"documents": [error_doc]} + diff --git a/integrations/github-haystack/src/github_haystack/github_components/pr_creator.py b/integrations/github-haystack/src/github_haystack/github_components/pr_creator.py new file mode 100644 index 0000000000..d27d8cb064 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/github_components/pr_creator.py @@ -0,0 +1,171 @@ +import re +from typing import Any, Dict + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@component +class GithubPRCreator: + """ + A Haystack component for creating pull requests from a fork back to the original repository. + + Uses the authenticated user's fork to create the PR and links it to an existing issue. + + ### Usage example + ```python + from haystack.components.actions import GithubPRCreator + from haystack.utils import Secret + + pr_creator = GithubPRCreator( + github_token=Secret.from_env_var("GITHUB_TOKEN") # Token from the fork owner + ) + + # Create a PR from your fork + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/123", + title="Fix issue #123", + body="This PR addresses issue #123", + branch="feature-branch", # The branch in your fork with the changes + base="main" # The branch in the original repo to merge into + ) + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for authentication (from the fork owner) + :param raise_on_failure: If True, raises exceptions on API errors + """ + if not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + + def _get_headers(self) -> Dict[str, str]: + """ + Get headers for GitHub API requests with resolved token. + + :return: Dictionary of request headers + """ + return { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"Bearer {self.github_token.resolve_value()}", + "User-Agent": "Haystack/GithubPRCreator" + } + + def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: + """ + Parse owner, repo name, and issue number from GitHub issue URL. + + :param issue_url: Full GitHub issue URL + :return: Tuple of (owner, repo_name, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, issue_url) + if not match: + raise ValueError("Invalid GitHub issue URL format") + return match.group(1), match.group(2), match.group(3) + + def _get_authenticated_user(self) -> str: + """Get the username of the authenticated user (fork owner).""" + response = requests.get( + "https://api.github.com/user", + headers=self._get_headers() + ) + response.raise_for_status() + return response.json()["login"] + + def _check_fork_exists(self, owner: str, repo: str, fork_owner: str) -> bool: + """Check if the fork exists.""" + url = f"https://api.github.com/repos/{fork_owner}/{repo}" + try: + response = requests.get(url, headers=self._get_headers()) + response.raise_for_status() + fork_data = response.json() + return fork_data.get("fork", False) + except requests.RequestException: + return False + + @component.output_types(result=str) + def run( + self, + issue_url: str, + title: str, + branch: str, + base: str, + body: str = "", + draft: bool = False + ) -> Dict[str, str]: + """ + Create a new pull request from your fork to the original repository, linked to the specified issue. + + :param issue_url: URL of the GitHub issue to link the PR to + :param title: Title of the pull request + :param branch: Name of the branch in your fork where changes are implemented + :param base: Name of the branch in the original repo you want to merge into + :param body: Additional content for the pull request description + :param draft: Whether to create a draft pull request + :return: Dictionary containing operation result + """ + try: + # Parse repository information from issue URL + owner, repo_name, issue_number = self._parse_issue_url(issue_url) + + # Get the authenticated user (fork owner) + fork_owner = self._get_authenticated_user() + + # Check if the fork exists + if not self._check_fork_exists(owner, repo_name, fork_owner): + return {"result": f"Error: Fork not found at {fork_owner}/{repo_name}"} + + url = f"https://api.github.com/repos/{owner}/{repo_name}/pulls" + + # For cross-repository PRs, head must be in the format username:branch + head = f"{fork_owner}:{branch}" + + pr_data = { + "title": title, + "body": body, + "head": head, + "base": base, + "draft": draft, + "maintainer_can_modify": True, # Allow maintainers to modify the PR + } + + response = requests.post(url, headers=self._get_headers(), json=pr_data) + response.raise_for_status() + pr_number = response.json()["number"] + + return {"result": f"Pull request #{pr_number} created successfully and linked to issue #{issue_number}"} + + except (requests.RequestException, ValueError) as e: + if self.raise_on_failure: + raise + return {"result": f"Error: {str(e)}"} + + def to_dict(self) -> Dict[str, Any]: + """Serialize the component to a dictionary.""" + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubPRCreator": + """Deserialize the component from a dictionary.""" + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py b/integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py new file mode 100644 index 0000000000..c2b9b2a018 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py @@ -0,0 +1,263 @@ +import base64 +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +import requests +from haystack import Document, component, logging +from haystack.utils import Secret + +logger = logging.getLogger(__name__) + + +@dataclass +class GitHubItem: + """Represents an item (file or directory) in a GitHub repository""" + + name: str + type: str # "file" or "dir" + path: str + size: int + url: str + content: Optional[str] = None + + +@component +class GithubRepositoryViewer: + """ + Navigates and fetches content from GitHub repositories. + + For directories: + - Returns a list of Documents, one for each item + - Each Document's content is the item name + - Full path and metadata in Document.meta + + For files: + - Returns a single Document + - Document's content is the file content + - Full path and metadata in Document.meta + + For errors: + - Returns a single Document + - Document's content is the error message + - Document's meta contains type="error" + + ### Usage example + ```python + from haystack.components.fetchers import GithubRepositoryViewer + from haystack.utils import Secret + + # Using token directly + viewer = GithubRepositoryViewer(github_token=Secret.from_token("your_token")) + + # Using environment variable + viewer = GithubRepositoryViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) + + # List directory contents - returns multiple documents + result = viewer.run( + repo="owner/repository", + path="docs/", + ref="main" + ) + + # Get specific file - returns single document + result = viewer.run( + repo="owner/repository", + path="README.md", + ref="main" + ) + ``` + """ + + def __init__( + self, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + max_file_size: int = 1_000_000, # 1MB default limit + repo: Optional[str] = None, + branch: Optional[str] = None + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param max_file_size: Maximum file size in bytes to fetch (default: 1MB) + """ + if github_token is not None and not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.max_file_size = max_file_size + self.repo = repo + self.branch = branch + + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubRepositoryViewer", + } + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return { + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "max_file_size": self.max_file_size, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubRepositoryViewer": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data.copy() + if init_params["github_token"]: + init_params["github_token"] = Secret.from_dict(init_params["github_token"]) + return cls(**init_params) + + def _parse_repo(self, repo: str) -> tuple[str, str]: + """Parse owner/repo string""" + parts = repo.split("/") + if len(parts) != 2: + raise ValueError( + f"Invalid repository format. Expected 'owner/repo', got '{repo}'" + ) + return parts[0], parts[1] + + def _normalize_path(self, path: str) -> str: + """Normalize repository path""" + return path.strip("/") + + def _fetch_contents(self, owner: str, repo: str, path: str, ref: str) -> Any: + """Fetch repository contents from GitHub API""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + if ref: + url += f"?ref={ref}" + + headers = self.headers.copy() + if self.github_token: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + + response = requests.get(url, headers=headers) + response.raise_for_status() + return response.json() + + def _process_file_content(self, content: str, encoding: str) -> str: + """Process file content based on encoding""" + if encoding == "base64": + return base64.b64decode(content).decode("utf-8") + return content + + def _create_file_document(self, item: GitHubItem) -> Document: + """Create a Document from a file""" + return Document( + content=item.content if item.content else item.name, + meta={ + "path": item.path, + "type": "file_content", + "size": item.size, + "url": item.url, + }, + ) + + def _create_directory_documents(self, items: List[GitHubItem]) -> List[Document]: + """Create a list of Documents from directory contents""" + return [ + Document( + content=item.name, + meta={ + "path": item.path, + "type": item.type, + "size": item.size, + "url": item.url, + }, + ) + for item in sorted(items, key=lambda x: (x.type != "dir", x.name.lower())) + ] + + def _create_error_document(self, error: Exception, path: str) -> Document: + """Create a Document from an error""" + return Document( + content=str(error), + meta={ + "type": "error", + "path": path, + }, + ) + + @component.output_types(documents=List[Document]) + def run( + self, path: str, repo: Optional[str] = None, branch: Optional[str] = None + ) -> Dict[str, List[Document]]: + """ + Process a GitHub repository path and return documents. + + :param repo: Repository in format "owner/repo" + :param path: Path within repository (default: root) + :param ref: Git reference (branch, tag, commit) to use + :return: Dictionary containing list of documents + """ + if repo is None: + repo = self.repo + if branch is None: + branch = self.branch + + try: + owner, repo_name = self._parse_repo(repo) + normalized_path = self._normalize_path(path) + + contents = self._fetch_contents(owner, repo_name, normalized_path, branch) + + # Handle single file response + if not isinstance(contents, list): + if contents.get("size", 0) > self.max_file_size: + raise ValueError( + f"File size {contents['size']} exceeds limit of {self.max_file_size}" + ) + + item = GitHubItem( + name=contents["name"], + type="file", + path=contents["path"], + size=contents["size"], + url=contents["html_url"], + content=self._process_file_content( + contents["content"], contents["encoding"] + ), + ) + return {"documents": [self._create_file_document(item)]} + + # Handle directory listing + items = [ + GitHubItem( + name=item["name"], + type="dir" if item["type"] == "dir" else "file", + path=item["path"], + size=item.get("size", 0), + url=item["html_url"], + ) + for item in contents + ] + + return {"documents": self._create_directory_documents(items)} + + except Exception as e: + error_doc = self._create_error_document( + f"Error processing repository path {path}: {str(e)}. Seems like the file does not exist.", path + ) + if self.raise_on_failure: + raise + logger.warning( + "Error processing repository path {path}: {error}", + path=path, + error=str(e), + ) + return {"documents": [error_doc]} + diff --git a/integrations/github-haystack/src/github_haystack/github_components/repository_forker.py b/integrations/github-haystack/src/github_haystack/github_components/repository_forker.py new file mode 100644 index 0000000000..9dabf8bba5 --- /dev/null +++ b/integrations/github-haystack/src/github_haystack/github_components/repository_forker.py @@ -0,0 +1,298 @@ +import re +from typing import Any, Dict, Optional + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@component +class GithubRepoForker: + """ + Forks a GitHub repository from an issue URL. + + The component takes a GitHub issue URL, extracts the repository information, + creates or syncs a fork of that repository, and optionally creates an issue-specific branch. + + ### Usage example + ```python + from haystack.components.actions import GithubRepoForker + from haystack.utils import Secret + + # Using direct token with auto-sync and branch creation + forker = GithubRepoForker( + github_token=Secret.from_token("your_token"), + auto_sync=True, + create_branch=True + ) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + # Will create or sync fork and create branch "fix-123" + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + wait_for_completion: bool = False, + max_wait_seconds: int = 300, + poll_interval: int = 2, + auto_sync: bool = True, + create_branch: bool = True, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param wait_for_completion: If True, waits until fork is fully created + :param max_wait_seconds: Maximum time to wait for fork completion in seconds + :param poll_interval: Time between status checks in seconds + :param auto_sync: If True, syncs fork with original repository if it already exists + :param create_branch: If True, creates a fix branch based on the issue number + """ + if not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.wait_for_completion = wait_for_completion + self.max_wait_seconds = max_wait_seconds + self.poll_interval = poll_interval + self.auto_sync = auto_sync + self.create_branch = create_branch + + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubRepoForker" + } + + def _parse_github_url(self, url: str) -> tuple[str, str, str]: + """ + Parse GitHub URL into owner, repo, and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + raise ValueError(f"Invalid GitHub issue URL format: {url}") + + owner, repo, issue_number = match.groups() + return owner, repo, issue_number + + def _check_fork_status(self, fork_path: str) -> bool: + """ + Check if a forked repository exists and is ready. + + :param fork_path: Repository path in owner/repo format + :return: True if fork exists and is ready, False otherwise + """ + url = f"https://api.github.com/repos/{fork_path}" + try: + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + return response.status_code == 200 + except requests.RequestException: + return False + + def _get_authenticated_user(self) -> str: + """ + Get the authenticated user's username. + + :return: Username of the authenticated user + :raises requests.RequestException: If API call fails + """ + url = "https://api.github.com/user" + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + return response.json()["login"] + + def _get_existing_repository(self, repo_name: str) -> Optional[str]: + """ + Check if a repository with the given name already exists in the authenticated user's account. + + :param repo_name: Repository name to check + :return: Full repository name if it exists, None otherwise + """ + url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" + try: + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + if response.status_code == 200: + return repo_name + return None + except requests.RequestException as e: + logger.warning(f"Failed to check repository existence: {str(e)}") + return None + + def _sync_fork(self, fork_path: str) -> None: + """ + Sync a fork with its upstream repository. + + :param fork_path: Fork path in owner/repo format + :raises requests.RequestException: If sync fails + """ + url = f"https://api.github.com/repos/{fork_path}/merge-upstream" + response = requests.post( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + json={"branch": "main"} + ) + response.raise_for_status() + + def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: + """ + Create a new branch for the issue. + + :param fork_path: Fork path in owner/repo format + :param issue_number: Issue number to use in branch name + :raises requests.RequestException: If branch creation fails + """ + # First, get the default branch SHA + url = f"https://api.github.com/repos/{fork_path}" + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + default_branch = response.json()["default_branch"] + + # Get the SHA of the default branch + url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + sha = response.json()["object"]["sha"] + + # Create the new branch + branch_name = f"fix-{issue_number}" + url = f"https://api.github.com/repos/{fork_path}/git/refs" + response = requests.post( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + json={ + "ref": f"refs/heads/{branch_name}", + "sha": sha + } + ) + response.raise_for_status() + + def _create_fork(self, owner: str, repo: str) -> str: + """ + Create a fork of the repository. + + :param owner: Original repository owner + :param repo: Repository name + :return: Fork path in owner/repo format + :raises requests.RequestException: If fork creation fails + """ + url = f"https://api.github.com/repos/{owner}/{repo}/forks" + response = requests.post( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + + fork_data = response.json() + return f"{fork_data['owner']['login']}/{fork_data['name']}" + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + wait_for_completion=self.wait_for_completion, + max_wait_seconds=self.max_wait_seconds, + poll_interval=self.poll_interval, + auto_sync=self.auto_sync, + create_branch=self.create_branch, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubRepoForker": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(repo=str, issue_branch=str) + def run(self, url: str) -> dict: + """ + Process a GitHub issue URL and create or sync a fork of the repository. + + :param url: GitHub issue URL + :return: Dictionary containing repository path in owner/repo format + """ + try: + # Extract repository information + owner, repo, issue_number = self._parse_github_url(url) + + # Check if fork already exists + user = self._get_authenticated_user() + existing_fork = self._get_existing_repository(repo) + + if existing_fork and self.auto_sync: + # If fork exists and auto_sync is enabled, sync with upstream + fork_path = f"{user}/{repo}" + logger.info("Fork already exists, syncing with upstream repository") + self._sync_fork(fork_path) + else: + # Create new fork + fork_path = self._create_fork(owner, repo) + + # Wait for fork completion if requested + if self.wait_for_completion: + import time + start_time = time.time() + + while time.time() - start_time < self.max_wait_seconds: + if self._check_fork_status(fork_path): + logger.info("Fork creation completed successfully") + break + logger.debug("Waiting for fork creation to complete...") + time.sleep(self.poll_interval) + else: + msg = f"Fork creation timed out after {self.max_wait_seconds} seconds" + if self.raise_on_failure: + raise TimeoutError(msg) + logger.warning(msg) + + # Create issue branch if enabled + issue_branch = None + if self.create_branch: + issue_branch = f"fix-{issue_number}" + logger.info(f"Creating branch for issue #{issue_number}") + self._create_issue_branch(fork_path, issue_number) + + return {"repo": fork_path, "issue_branch": issue_branch} + + except Exception as e: + if self.raise_on_failure: + raise + logger.warning("Error forking repository from {url}: {error}", url=url, error=str(e)) + return {"repo": "", "issue_branch": None} diff --git a/integrations/github-haystack/tests/__init__.py b/integrations/github-haystack/tests/__init__.py new file mode 100644 index 0000000000..d391382c6b --- /dev/null +++ b/integrations/github-haystack/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 From 890ecde0864f2e4ce99d91b33a95f10fee0b2f8e Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 10 Apr 2025 15:20:13 +0200 Subject: [PATCH 02/51] rename to github_haystack --- integrations/github_haystack/LICENSE.txt | 73 +++++ integrations/github_haystack/README.md | 21 ++ integrations/github_haystack/pyproject.toml | 61 ++++ .../src/github_haystack/__about__.py | 4 + .../src/github_haystack/__init__.py | 3 + .../github_haystack/agent_prompts/__init__.py | 8 + .../agent_prompts/comment_tool.py | 22 ++ .../github_haystack/agent_prompts/context.py | 175 ++++++++++ .../agent_prompts/file_editor_tool.py | 130 ++++++++ .../agent_prompts/pr_system_prompt.py | 53 ++++ .../agent_prompts/repo_viewer_tool.py | 78 +++++ .../agent_prompts/system_prompt.py | 61 ++++ .../github_components/file_editor.py | 299 ++++++++++++++++++ .../github_components/issue_commenter.py | 155 +++++++++ .../github_components/issue_viewer.py | 218 +++++++++++++ .../github_components/pr_creator.py | 171 ++++++++++ .../github_components/repo_viewer.py | 263 +++++++++++++++ .../github_components/repository_forker.py | 298 +++++++++++++++++ .../github_haystack/tests/__init__.py | 3 + 19 files changed, 2096 insertions(+) create mode 100644 integrations/github_haystack/LICENSE.txt create mode 100644 integrations/github_haystack/README.md create mode 100644 integrations/github_haystack/pyproject.toml create mode 100644 integrations/github_haystack/src/github_haystack/__about__.py create mode 100644 integrations/github_haystack/src/github_haystack/__init__.py create mode 100644 integrations/github_haystack/src/github_haystack/agent_prompts/__init__.py create mode 100644 integrations/github_haystack/src/github_haystack/agent_prompts/comment_tool.py create mode 100644 integrations/github_haystack/src/github_haystack/agent_prompts/context.py create mode 100644 integrations/github_haystack/src/github_haystack/agent_prompts/file_editor_tool.py create mode 100644 integrations/github_haystack/src/github_haystack/agent_prompts/pr_system_prompt.py create mode 100644 integrations/github_haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py create mode 100644 integrations/github_haystack/src/github_haystack/agent_prompts/system_prompt.py create mode 100644 integrations/github_haystack/src/github_haystack/github_components/file_editor.py create mode 100644 integrations/github_haystack/src/github_haystack/github_components/issue_commenter.py create mode 100644 integrations/github_haystack/src/github_haystack/github_components/issue_viewer.py create mode 100644 integrations/github_haystack/src/github_haystack/github_components/pr_creator.py create mode 100644 integrations/github_haystack/src/github_haystack/github_components/repo_viewer.py create mode 100644 integrations/github_haystack/src/github_haystack/github_components/repository_forker.py create mode 100644 integrations/github_haystack/tests/__init__.py diff --git a/integrations/github_haystack/LICENSE.txt b/integrations/github_haystack/LICENSE.txt new file mode 100644 index 0000000000..137069b823 --- /dev/null +++ b/integrations/github_haystack/LICENSE.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/integrations/github_haystack/README.md b/integrations/github_haystack/README.md new file mode 100644 index 0000000000..a816d2c6d0 --- /dev/null +++ b/integrations/github_haystack/README.md @@ -0,0 +1,21 @@ +# github-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/github-haystack.svg)](https://pypi.org/project/github-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/github-haystack.svg)](https://pypi.org/project/github-haystack) + +----- + +## Table of Contents + +- [Installation](#installation) +- [License](#license) + +## Installation + +```console +pip install github-haystack +``` + +## License + +`github-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/github_haystack/pyproject.toml b/integrations/github_haystack/pyproject.toml new file mode 100644 index 0000000000..539aa0c09a --- /dev/null +++ b/integrations/github_haystack/pyproject.toml @@ -0,0 +1,61 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "github-haystack" +dynamic = ["version"] +description = 'Haystack components for interacting with GitHub repositories' +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "deepset GmbH", email = "info@deepset.ai" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [] + +[project.urls] +Documentation = "https://github.com/deepset GmbH/github-haystack#readme" +Issues = "https://github.com/deepset GmbH/github-haystack/issues" +Source = "https://github.com/deepset GmbH/github-haystack" + +[tool.hatch.version] +path = "src/github_haystack/__about__.py" + +[tool.hatch.envs.types] +extra-dependencies = [ + "mypy>=1.0.0", +] +[tool.hatch.envs.types.scripts] +check = "mypy --install-types --non-interactive {args:src/github_haystack tests}" + +[tool.coverage.run] +source_pkgs = ["github_haystack", "tests"] +branch = true +parallel = true +omit = [ + "src/github_haystack/__about__.py", +] + +[tool.coverage.paths] +github_haystack = ["src/github_haystack", "*/github-haystack/src/github_haystack"] +tests = ["tests", "*/github-haystack/tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] diff --git a/integrations/github_haystack/src/github_haystack/__about__.py b/integrations/github_haystack/src/github_haystack/__about__.py new file mode 100644 index 0000000000..348b27fe81 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/__about__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +__version__ = "0.0.1" diff --git a/integrations/github_haystack/src/github_haystack/__init__.py b/integrations/github_haystack/src/github_haystack/__init__.py new file mode 100644 index 0000000000..d391382c6b --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/__init__.py b/integrations/github_haystack/src/github_haystack/agent_prompts/__init__.py new file mode 100644 index 0000000000..984d948c61 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/agent_prompts/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from .repo_viewer_tool import repo_viewer_prompt, repo_viewer_schema +from .system_prompt import issue_prompt + +_all_ = ["issue_prompt", "repo_viewer_prompt", "repo_viewer_schema"] diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/comment_tool.py b/integrations/github_haystack/src/github_haystack/agent_prompts/comment_tool.py new file mode 100644 index 0000000000..27baf46e5c --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/agent_prompts/comment_tool.py @@ -0,0 +1,22 @@ +comment_prompt = """ +Haystack-Agent uses this tool to post a comment to a Github-issue discussion. + + +Pass a `comment` string to post a comment. + + +IMPORTANT +Haystack-Agent MUST pass "comment" to this tool. Otherwise, comment creation fails. +Haystack-Agent always passes the contents of the comment to the "comment" parameter when calling this tool. +""" + +comment_schema = { + "properties": { + "comment": { + "type": "string", + "description": "The contents of the comment that you want to create." + } + }, + "required": ["comment"], + "type": "object" +} diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/context.py b/integrations/github_haystack/src/github_haystack/agent_prompts/context.py new file mode 100644 index 0000000000..ff45fccde9 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/agent_prompts/context.py @@ -0,0 +1,175 @@ +haystack_context_prompt = """ + +Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related +questions. +The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. +This information is not a replacement for carefully exploring relevant repositories before posting a comment. + +**Haystack Description** +An Open-Source Python framework for developers worldwide. +AI orchestration framework to build customizable, production-ready LLM applications. +Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. +With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or +conversational agent chatbots. + +**High-Level Architecture** +Haystack has two central abstractions: +- Components +- Pipelines + +A Component is a lightweight abstraction that gets inputs, performs an action and returns outputs. +Some example components: +- `OpenAIGenerator`: receives a prompt and generates replies to the prompt by calling an OpenAI-model +- `MetadataRouter`: routes documents to configurable outputs based on their metadata +- `BM25Retriever`: retrieves documents from a 'DocumentStore' based on the 'query'-input + +A component is lightweight. It is easy to implement custom components. Here is some information from the docs: + +Requirements + +Here are the requirements for all custom components: + +- `@component`: This decorator marks a class as a component, allowing it to be used in a pipeline. +- `run()`: This is a required method in every component. It accepts input arguments and returns a `dict`. The inputs can +either come from the pipeline when it’s executed, or from the output of another component when connected using +`connect()`. The `run()` method should be compatible with the input/output definitions declared for the component. +See an [Extended Example](#extended-example) below to check how it works. + +## Inputs and Outputs + +Next, define the inputs and outputs for your component. + +### Inputs + +You can choose between three input options: + +- `set_input_type`: This method defines or updates a single input socket for a component instance. It’s ideal for adding +or modifying a specific input at runtime without affecting others. Use this when you need to dynamically set or modify +a single input based on specific conditions. +- `set_input_types`: This method allows you to define multiple input sockets at once, replacing any existing inputs. +It’s useful when you know all the inputs the component will need and want to configure them in bulk. Use this when you +want to define multiple inputs during initialization. +- Declaring arguments directly in the `run()` method. Use this method when the component’s inputs are static and known +at the time of class definition. + +### Outputs + +You can choose between two output options: + +- `@component.output_types`: This decorator defines the output types and names at the time of class definition. The +output names and types must match the `dict` returned by the `run()` method. Use this when the output types are static +and known in advance. This decorator is cleaner and more readable for static components. +- `set_output_types`: This method defines or updates multiple output sockets for a component instance at runtime. +It’s useful when you need flexibility in configuring outputs dynamically. Use this when the output types need to be set +at runtime for greater flexibility. + +# Short Example + +Here is an example of a simple minimal component setup: + +```python +from haystack import component + +@component +class WelcomeTextGenerator: + ''' + A component generating personal welcome message and making it upper case + ''' + @component.output_types(welcome_text=str, note=str) + def run(self, name:str): + return {"welcome_text": f'Hello {name}, welcome to Haystack!'.upper(), "note": "welcome message is ready"} + +``` + +Here, the custom component `WelcomeTextGenerator` accepts one input: `name` string and returns two outputs: +`welcome_text` and `note`. + + +---------- + +**Pipelines** +The pipelines in Haystack 2.0 are directed multigraphs of different Haystack components and integrations. +They give you the freedom to connect these components in various ways. This means that the +pipeline doesn't need to be a continuous stream of information. With the flexibility of Haystack pipelines, +you can have simultaneous flows, standalone components, loops, and other types of connections. + +# Steps to Create a Pipeline Explained + +Once all your components are created and ready to be combined in a pipeline, there are four steps to make it work: + +1. Create the pipeline with `Pipeline()`. + This creates the Pipeline object. +2. Add components to the pipeline, one by one, with `.add_component(name, component)`. + This just adds components to the pipeline without connecting them yet. It's especially useful for loops as it allows + the smooth connection of the components in the next step because they all already exist in the pipeline. +3. Connect components with `.connect("producer_component.output_name", "consumer_component.input_name")`. + At this step, you explicitly connect one of the outputs of a component to one of the inputs of the next component. + This is also when the pipeline validates the connection without running the components. It makes the validation fast. +4. Run the pipeline with `.run({"component_1": {"mandatory_inputs": value}})`. + Finally, you run the Pipeline by specifying the first component in the pipeline and passing its mandatory inputs. + + Optionally, you can pass inputs to other components, for example: + `.run({"component_1": {"mandatory_inputs": value}, "component_2": {"inputs": value}})`. + +The full pipeline [example](/docs/creating-pipelines#example) in [Creating Pipelines](/docs/creating-pipelines) shows +how all the elements come together to create a working RAG pipeline. + +Once you create your pipeline, you can [visualize it in a graph](/docs/drawing-pipeline-graphs) to understand how the +components are connected and make sure that's how you want them. You can use Mermaid graphs to do that. + +# Validation + +Validation happens when you connect pipeline components with `.connect()`, but before running the components to make it +faster. The pipeline validates that: + +- The components exist in the pipeline. +- The components' outputs and inputs match and are explicitly indicated. For example, if a component produces two +outputs, when connecting it to another component, you must indicate which output connects to which input. +- The components' types match. +- For input types other than `Variadic`, checks if the input is already occupied by another connection. + +All of these checks produce detailed errors to help you quickly fix any issues identified. + +# Serialization + +Thanks to serialization, you can save and then load your pipelines. Serialization is converting a Haystack pipeline +into a format you can store on disk or send over the wire. It's particularly useful for: + +- Editing, storing, and sharing pipelines. +- Modifying existing pipelines in a format different than Python. + +Haystack pipelines delegate the serialization to its components, so serializing a pipeline simply means serializing +each component in the pipeline one after the other, along with their connections. The pipeline is serialized into a +dictionary format, which acts as an intermediate format that you can then convert into the final format you want. + +> 📘 Serialization formats +> +> Haystack 2.0 only supports YAML format at this time. We'll be rolling out more formats gradually. + +For serialization to be possible, components must support conversion from and to Python dictionaries. All Haystack +components have two methods that make them serializable: `from_dict` and `to_dict`. The `Pipeline` class, in turn, has +its own `from_dict` and `to_dict` methods that take care of serializing components and connections. + + +--------- + +**Haystack Repositories** + +1. "deepset-ai/haystack" + +Contains the core code for the Haystack framework and a few components. +The components that are part of this repository typically don't have heavy dependencies. + + +2. "deepset-ai/haystack-core-integrations" + +This is a mono-repo maintained by the deepset-Team that contains integrations for the Haystack framework. +Typically, an integration consists of one or more components. Some integrations only contain document stores. +Each integration is a standalone pypi-package but you can find all of them in the core integrations repo. + + +3. "deepset-ai/haystack-experimental" + +Contains experimental features for the Haystack framework. + +""" diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/file_editor_tool.py b/integrations/github_haystack/src/github_haystack/agent_prompts/file_editor_tool.py new file mode 100644 index 0000000000..61ac77b0d5 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/agent_prompts/file_editor_tool.py @@ -0,0 +1,130 @@ +file_editor_prompt = """ +Use the file editor to edit an existing file in the repository. + +You must provide a 'command' for the action that you want to perform: +- edit +- create +- delete +- undo + +The 'payload' contains your options for each command. + +**Command 'edit'** + +To edit a file, you need to provide: +1. The path to the file +2. The original code snippet from the file +3. Your replacement code +4. A commit message + +The code will only be replaced if it is unique in the file. Pass a minimum of 2 consecutive lines that should +be replaced. If the original is not unique, the editor will return an error. +Pay attention to whitespace both for the original as well as the replacement. + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +Example: +{ + "command": "edit", + "payload": { + "path": "README.md", + "original": "This is a placeholder description!\\nIt should be updated.", + "replacement": "This project helps developers test AI applications.", + "message": "docs: README should mention project purpose." + } +} + + +**Command 'create'** + +To create a file, you need to provide: +1. The path for the new file +2. The content for the file +3. A commit message + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +IMPORTANT: +You MUST ALWAYS provide 'content' when creating a new file. File creation with empty content does not work. + +Example: +{ + "command": "create", + "payload": { + "path": "CONTRIBUTING.md", + "content": "Contributions are welcome, please write tests and follow our code style guidelines.", + "message": "chore: minimal instructions for contributors" + } +} + + +**Command 'delete'** + +To delete a file, you need to provide: +1. The path to the file to delete +2. A commit message + +The commit message should be short and communicate your intention. +Use the conventional commit style for your messages. + +Example: +{ + "command": "delete", + "payload": { + "path": "tests/components/test_messaging", + "message": "chore: messaging feature was removed" + } +} + +**Command 'undo'** + +This is how to undo your latest change. + +Important notes: +- You can only undo your own changes +- You can only undo one change at a time +- You need to provide a message for the undo operation + +Example: +{ + "command": "undo", + "payload": { + "message": "revert: undo previous commit due to failing tests" + } +} +""" + +file_editor_schema = { + "type": "object", + "properties": { + "command": { + "type": "string", + "enum": ["edit", "create", "delete", "undo"], + "description": "The command to execute" + }, + "payload": { + "type": "object", + "required": ["message"], + "properties": { + "message": { + "type": "string" + }, + "content": { + "type": "string" + }, + "path": { + "type": "string" + }, + "original": { + "type": "string" + }, + "replacement": { + "type": "string" + } + } + } + }, + "required": ["command", "payload"] +} diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/pr_system_prompt.py b/integrations/github_haystack/src/github_haystack/agent_prompts/pr_system_prompt.py new file mode 100644 index 0000000000..a99c8ae100 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/agent_prompts/pr_system_prompt.py @@ -0,0 +1,53 @@ +system_prompt = """ +The assistant is Haystack-Agent, created by deepset. +Haystack-Agent creates Pull Requests that resolve GitHub issues. + +Haystack-Agent receives a GitHub issue and all current comments. +Haystack-Agent analyzes the issue, creates code changes, and submits a Pull Request. + +**Issue Analysis** +Haystack-Agent reviews all implementation suggestions in the comments. +Haystack-Agent evaluates each proposed approach and determines if it adequately solves the issue. +Haystack-Agent uses the `repository_viewer` utility to examine repository files. +Haystack-Agent views any files that are directly referenced in the issue, to understand the context of the issue. +Haystack-Agent follows instructions that are provided in the comments, when they make sense. + +**Software Engineering** +Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. +Haystack-Agent finds the right level of abstraction and complexity. +When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and +documentation patterns that are already being used in the codebase. +Haystack-Agent may propose better code style, documentation, or architecture when appropriate. +Haystack-Agent needs context on the code being discussed before starting to resolve the issue. +Haystack-Agent produces code that can be merged without needing manual intervention from other developers. +Haystack-Agent adapts to the comment style, that is already being used in the codebase. +It avoids superfluous comments that point out the obvious. When Haystack-Agent wants to explain code changes, +it uses the PR description for that. + +**Thinking Process** +Haystack-Agent thinks thoroughly about each issue. +Haystack-Agent takes time to consider all aspects of the implementation. +A lengthy thought process is acceptable and often necessary for proper resolution. + + +Haystack-Agent notes down any thoughts and observations in the scratchpad, so that it can reference them later. + + +**Resolution Process** +Haystack-Agent follows these steps to resolve issues: + +1. Analyze the issue and comments, noting all proposed implementations +2. Explore the repository from the root (/) directory +3. Examine files referenced in the issue or comments +4. View additional files and test cases to understand intended behavior +5. Create initial test cases to validate the planned solution +6. Edit repository source code to resolve the issue +7. Update test cases to match code changes +8. Handle edge cases and ensure code matches repository style +9. Create a Pull Request using the `create_pr` utility + +**Pull Request Creation** +Haystack-Agent writes clear Pull Request descriptions. +Each description explains what changes were made and why they were necessary. +The description helps reviewers understand the implementation approach. +""" diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py b/integrations/github_haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py new file mode 100644 index 0000000000..df9cad616b --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py @@ -0,0 +1,78 @@ +repo_viewer_prompt = """ +Haystack-Agent uses this tool to browse GitHub repositories. +Haystack-Agent can view directories and files with this tool. + + +Pass a `repo` string for the repository that you want to view. +It is required to pass `repo` to use this tool. +The structure is "owner/repo-name". + +Pass a `path` string for the directory or file that you want to view. +If you pass an empty path, you will view the root directory of the repository. + +Examples: + +- {"repo": "pandas-dev/pandas", "path": ""} + - will show you the root of the pandas repository +- {"repo": "pandas-dev/pandas", "path": "pyproject.toml"} + - will show you the "pyproject.toml"-file of the pandas repository +- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert"} + - will show you the "albert"-directory in the transformers repository +- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert/albert_modelling.py"} + - will show you the source code for the albert model in the transformers repository + + +Haystack-Agent uses the `github_repository_viewer` to view relevant code. +Haystack-Agent starts at the root of the repository. +Haystack-Agent navigates one level at a time using directory listings. +Haystack-Agent views all relevant code, testing, configuration, or documentation files on a level. +It never skips a directory level or guesses full paths. + +Haystack-Agent thinks deeply about the content of a repository. Before Haystack-Agent uses the tool, it reasons about +next steps: + + +- What am I looking for in this location? +- Why is this path potentially relevant? +- What specific files might help solve the issue? +- What patterns or implementations should I look for? + + +After viewing the contents of a file or directory, Haystack-Agent reflects on its observations before moving on: + +- What did I learn from these files? +- What else might be related? +- Where should I look next and why? + + +IMPORTANT +Haystack-Agent views the content of relevant files, it knows that it is not enough to explore the directory structure. +Haystack-Agent needs to read the code to understand it properly. +To view a file, Haystack-Agent passes the full path of the file to the `github_repository_viewer`. +Haystack-Agent never guesses a file or directory path. + +Haystack-Agent takes notes after viewing code: + +- extract important code snippets +- document key functions, classes or configurations +- note key architecture patterns +- relate findings to the original issue +- relate findings to other code that was already viewed +- note down file paths as a reference + +""" + +repo_viewer_schema = { + "properties": { + "repo": { + "type": "string", + "description": "The owner/repository_name that you want to view." + }, + "path": { + "type": "string", + "description": "Path to directory or file to view. Defaults to repository root.", + } + }, + "required": ["repo"], + "type": "object" +} diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/system_prompt.py b/integrations/github_haystack/src/github_haystack/agent_prompts/system_prompt.py new file mode 100644 index 0000000000..14b612a08e --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/agent_prompts/system_prompt.py @@ -0,0 +1,61 @@ +issue_prompt = """ +The assistant is Haystack-Agent, created by deepset. +Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. + +Haystack-Agent receives a GitHub issue and all current comments. +Haystack-Agent participates in the discussion by: +- helping users find answers to their questions +- analyzing bug reports and proposing a fix when necessary +- analyzing feature requests and proposing an implementation +- being a sounding board in architecture discussions and proposing alternative solutions + +**Style** +Haystack-Agent uses Markdown formatting. When using Markdown, Haystack-Agent always follows best practices for clarity +and consistency. +It always uses a single space after hash symbols for headers (e.g., ”# Header 1”) and leaves a blank line before and +after headers, lists, and code blocks. For emphasis, Haystack-Agent uses asterisks or underscores consistently +(e.g., italic or bold). When creating lists, it aligns items properly and uses a single space after the list marker. +For nested bullets in bullet point lists, Haystack-Agent uses two spaces before the asterisk (*) or hyphen (-) for each +level of nesting. For nested bullets in numbered lists, Haystack-Agent uses three spaces before the number and period +(e.g., “1.”) for each level of nesting. When writing code, Haystack-Agent uses Markdown-blocks with appropriate language +annotation. + +**Software Engineering** +Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. +Haystack-Agent finds the right level of abstraction and complexity. +When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and +documentation patterns that are already being used in the codebase. +Haystack-Agent may propose better code style, documentation, or architecture when appropriate. +Haystack-Agent needs context on the code being discussed before responding with a comment. +Haystack-Agent does not craft any comments without knowing the code being discussed. +Haystack-Agent can explore any repository on GitHub and view its contents. + +**Exploring Repositories** +Haystack-Agent uses the `repository_viewer` to explore GitHub repositories before crafting a comment. +Haystack-Agent explores more than one repository when the GitHub discussions mentions multiple relevant repositories. + +**Thinking** +Haystack-Agent is a rigorous thinker. It uses -blocks to gather thoughts, reflect on the issue at +hand, and relate its learnings to it. It is not afraid of a lengthy thought process, because it knows that Software +Engineering is a challenging discipline. +Haystack-Agent takes notes on the . The scratchpad holds important pieces of information that +Haystack-Agent wants to reference later. + +**Comments** +Haystack-Agent is friendly, uses accessible language and keeps comments as simple as possible. +When developers address Haystack-Agent directly, it follows their instructions and finds the best response to their +comment. Haystack-Agent is happy to revise its code when a developer asks for it. +Haystack-Agent may disagree with a developer, when the changes being asked for clearly don't help to resolve the issue +or when Haystack-Agent has found a better approach to solving it. +Haystack-Agent uses the `create_comment`-tool to create a comment. Before creating a comment, Haystack-Agent reflects on +the issue, and any learnings from the code analysis. Haystack-Agent only responds when ready. + + +Haystack-Agent, this is IMPORTANT: +- DO NOT START WRITING YOUR RESPONSE UNTIL YOU HAVE COMPLETED THE ENTIRE EXPLORATION PHASE +- VIEWING DIRECTORY LISTINGS IS NOT ENOUGH - YOU MUST EXAMINE FILE CONTENTS +- If you find yourself running out of context space during exploration, say: "I need to continue exploring the codebase +before providing a complete response." Then continue exploration in the next interaction. + +Haystack-Agent will now receive its tools including instructions and will then participate in a Github-issue discussion. +""" diff --git a/integrations/github_haystack/src/github_haystack/github_components/file_editor.py b/integrations/github_haystack/src/github_haystack/github_components/file_editor.py new file mode 100644 index 0000000000..cfacfa213e --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/github_components/file_editor.py @@ -0,0 +1,299 @@ +from base64 import b64decode, b64encode +from enum import StrEnum +from typing import Any, Dict, Optional, Union + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + +class Command(StrEnum): + """ + Available commands for file operations in GitHub. + + Attributes: + EDIT: Edit an existing file by replacing content + UNDO: Revert the last commit if made by the same user + CREATE: Create a new file + DELETE: Delete an existing file + """ + EDIT = "edit" + UNDO = "undo" + CREATE = "create" + DELETE = "delete" + +@component +class GithubFileEditor: + """ + A Haystack component for editing files in GitHub repositories. + + Supports editing, undoing changes, deleting files, and creating new files + through the GitHub API. + + ### Usage example + ```python + from haystack.components.actions import GithubFileEditor + from haystack.utils import Secret + + # Initialize with default repo and branch + editor = GithubFileEditor( + github_token=Secret.from_env_var("GITHUB_TOKEN"), + repo="owner/repo", + branch="main" + ) + + # Edit a file using default repo and branch + result = editor.run( + command=Command.EDIT, + payload={ + "path": "path/to/file.py", + "original": "def old_function():", + "replacement": "def new_function():", + "message": "Renamed function for clarity" + } + ) + + # Edit a file in a different repo/branch + result = editor.run( + command=Command.EDIT, + repo="other-owner/other-repo", # Override default repo + branch="feature", # Override default branch + payload={ + "path": "path/to/file.py", + "original": "def old_function():", + "replacement": "def new_function():", + "message": "Renamed function for clarity" + } + ) + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + repo: Optional[str] = None, + branch: str = "main", + raise_on_failure: bool = True + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + """ + if not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.default_repo = repo + self.default_branch = branch + self.raise_on_failure = raise_on_failure + + self.headers = { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"Bearer {self.github_token.resolve_value()}", + "User-Agent": "Haystack/GithubFileEditor" + } + + def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: + """Get file content and SHA from GitHub.""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + response = requests.get(url, headers=self.headers, params={"ref": branch}) + response.raise_for_status() + data = response.json() + content = b64decode(data["content"]).decode("utf-8") + return content, data["sha"] + + def _update_file( + self, + owner: str, + repo: str, + path: str, + content: str, + message: str, + sha: str, + branch: str + ) -> bool: + """Update file content on GitHub.""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + payload = { + "message": message, + "content": b64encode(content.encode("utf-8")).decode("utf-8"), + "sha": sha, + "branch": branch + } + response = requests.put(url, headers=self.headers, json=payload) + response.raise_for_status() + return True + + def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: + """Check if last commit was made by the current token user.""" + url = f"https://api.github.com/repos/{owner}/{repo}/commits" + response = requests.get(url, headers=self.headers, params={"per_page": 1, "sha": branch}) + response.raise_for_status() + last_commit = response.json()[0] + commit_author = last_commit["author"]["login"] + + # Get current user + user_response = requests.get("https://api.github.com/user", headers=self.headers) + user_response.raise_for_status() + current_user = user_response.json()["login"] + + return commit_author == current_user + + def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file editing.""" + try: + content, sha = self._get_file_content(owner, repo, payload["path"], branch) + + # Check if original string is unique + occurrences = content.count(payload["original"]) + if occurrences == 0: + return "Error: Original string not found in file" + if occurrences > 1: + return "Error: Original string appears multiple times. Please provide more context" + + # Perform the replacement + new_content = content.replace(payload["original"], payload["replacement"]) + success = self._update_file( + owner, repo, payload["path"], new_content, payload["message"], sha, branch + ) + return "Edit successful" if success else "Edit failed" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle undoing changes.""" + try: + if not self._check_last_commit(owner, repo, branch): + return "Error: Last commit was not made by the current user" + + # Reset to previous commit + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch}" + commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" + + # Get the previous commit SHA + commits = requests.get( + commits_url, + headers=self.headers, + params={"per_page": 2, "sha": branch} + ).json() + previous_sha = commits[1]["sha"] + + # Update branch reference to previous commit + payload = {"sha": previous_sha, "force": True} + response = requests.patch(url, headers=self.headers, json=payload) + response.raise_for_status() + + return "Successfully undid last change" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file creation.""" + try: + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" + content = b64encode(payload["content"].encode("utf-8")).decode("utf-8") + + data = { + "message": payload["message"], + "content": content, + "branch": branch + } + + response = requests.put(url, headers=self.headers, json=data) + response.raise_for_status() + return "File created successfully" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + """Handle file deletion.""" + try: + content, sha = self._get_file_content(owner, repo, payload["path"], branch) + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" + + data = { + "message": payload["message"], + "sha": sha, + "branch": branch + } + + response = requests.delete(url, headers=self.headers, json=data) + response.raise_for_status() + return "File deleted successfully" + + except requests.RequestException as e: + if self.raise_on_failure: + raise + return f"Error: {str(e)}" + + @component.output_types(result=str) + def run( + self, + command: Union[Command, str], + payload: Dict[str, Any], + repo: Optional[str] = None, + branch: Optional[str] = None + ) -> Dict[str, str]: + """ + Process GitHub file operations. + + :param command: Operation to perform ("edit", "undo", "create", "delete") + :param payload: Dictionary containing command-specific parameters + :param repo: Repository in owner/repo format (overrides default if provided) + :param branch: Branch to perform operations on (overrides default if provided) + :return: Dictionary containing operation result + """ + if repo is None: + if self.default_repo is None: + return { + "result": "Error: No repository specified. Either provide it in initialization or in run() method" + } + repo = self.default_repo + + working_branch = branch if branch is not None else self.default_branch + owner, repo_name = repo.split("/") + + command_handlers = { + Command.EDIT: self._edit_file, + Command.UNDO: self._undo_changes, + Command.CREATE: self._create_file, + Command.DELETE: self._delete_file + } + + if command not in command_handlers: + return {"result": f"Error: Unknown command '{command}'"} + + result = command_handlers[command](owner, repo_name, payload, working_branch) + return {"result": result} + + def to_dict(self) -> Dict[str, Any]: + """Serialize the component to a dictionary.""" + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + repo=self.default_repo, + branch=self.default_branch, + raise_on_failure=self.raise_on_failure + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubFileEditor": + """Deserialize the component from a dictionary.""" + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + diff --git a/integrations/github_haystack/src/github_haystack/github_components/issue_commenter.py b/integrations/github_haystack/src/github_haystack/github_components/issue_commenter.py new file mode 100644 index 0000000000..f564eb4e7a --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/github_components/issue_commenter.py @@ -0,0 +1,155 @@ +import re +from typing import Any, Dict, Optional + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import deserialize_secrets_inplace +from haystack.utils.auth import Secret + +logger = logging.getLogger(__name__) + + +@component +class GithubIssueCommenter: + """ + Posts comments to GitHub issues. + + The component takes a GitHub issue URL and comment text, then posts the comment + to the specified issue using the GitHub API. + + ### Usage example + ```python + from haystack.components.writers import GithubIssueCommenter + + commenter = GithubIssueCommenter(github_token=Secret.from_env_var("GITHUB_TOKEN")) + result = commenter.run( + url="https://github.com/owner/repo/issues/123", + comment="Thanks for reporting this issue! We'll look into it." + ) + + assert result["success"] is True + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication as a Secret + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + """ + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + # Set base headers during initialization + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubIssueCommenter", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, int]: + """ + Parse GitHub URL into owner, repo and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + raise ValueError(f"Invalid GitHub issue URL format: {url}") + + owner, repo, issue_number = match.groups() + return owner, repo, int(issue_number) + + def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) -> bool: + """ + Post a comment to a GitHub issue. + + :param owner: Repository owner + :param repo: Repository name + :param issue_number: Issue number + :param comment: Comment text to post + :return: True if comment was posted successfully + :raises requests.exceptions.RequestException: If the API request fails + """ + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments" + data = {"body": comment} + + for attempt in range(self.retry_attempts): + try: + response = requests.post(url, headers=self._get_request_headers(), json=data) + response.raise_for_status() + return True + except requests.exceptions.RequestException as e: + if attempt == self.retry_attempts - 1: + raise + logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying...") + + return False + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueCommenter": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(success=bool) + def run(self, url: str, comment: str) -> dict: + """ + Post a comment to a GitHub issue. + + :param url: GitHub issue URL + :param comment: Comment text to post + :return: Dictionary containing success status + """ + try: + owner, repo, issue_number = self._parse_github_url(url) + success = self._post_comment(owner, repo, issue_number, comment) + return {"success": success} + + except Exception as e: + if self.raise_on_failure: + raise + + error_message = f"Error posting comment to GitHub issue {url}: {str(e)}" + logger.warning(error_message) + return {"success": False} diff --git a/integrations/github_haystack/src/github_haystack/github_components/issue_viewer.py b/integrations/github_haystack/src/github_haystack/github_components/issue_viewer.py new file mode 100644 index 0000000000..89fdaea771 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/github_components/issue_viewer.py @@ -0,0 +1,218 @@ +import re +from typing import Any, Dict, List, Optional + +import requests +from haystack import Document, component, default_from_dict, default_to_dict, logging +from haystack.utils import deserialize_secrets_inplace +from haystack.utils.auth import Secret + +logger = logging.getLogger(__name__) + + +@component +class GithubIssueViewer: + """ + Fetches and parses GitHub issues into Haystack documents. + + The component takes a GitHub issue URL and returns a list of documents where: + - First document contains the main issue content + - Subsequent documents contain the issue comments + + ### Usage example + ```python + from haystack.components.fetchers import GithubIssueViewer + + viewer = GithubIssueViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) + docs = viewer.run( + url="https://github.com/owner/repo/issues/123" + )["documents"] + + assert len(docs) >= 1 # At least the main issue + assert docs[0].meta["type"] == "issue" + ``` + """ + + def __init__( + self, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication as a Secret + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + """ + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + # Only set the basic headers during initialization + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubIssueViewer", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.headers.copy() + if self.github_token: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, int]: + """ + Parse GitHub URL into owner, repo and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + raise ValueError(f"Invalid GitHub issue URL format: {url}") + + owner, repo, issue_number = match.groups() + return owner, repo, int(issue_number) + + def _fetch_issue(self, owner: str, repo: str, issue_number: int) -> Any: + """ + Fetch issue data from GitHub API. + + :param owner: Repository owner + :param repo: Repository name + :param issue_number: Issue number + :return: Issue data dictionary + """ + url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}" + response = requests.get(url, headers=self._get_request_headers()) + response.raise_for_status() + return response.json() + + def _fetch_comments(self, comments_url: str) -> Any: + """ + Fetch issue comments from GitHub API. + + :param comments_url: URL for issue comments + :return: List of comment dictionaries + """ + response = requests.get(comments_url, headers=self._get_request_headers()) + response.raise_for_status() + return response.json() + + def _create_issue_document(self, issue_data: dict) -> Document: + """ + Create a Document from issue data. + + :param issue_data: Issue data from GitHub API + :return: Haystack Document + """ + return Document( # type: ignore + content=issue_data["body"], + meta={ + "type": "issue", + "title": issue_data["title"], + "number": issue_data["number"], + "state": issue_data["state"], + "created_at": issue_data["created_at"], + "updated_at": issue_data["updated_at"], + "author": issue_data["user"]["login"], + "url": issue_data["html_url"], + }, + ) + + def _create_comment_document( + self, comment_data: dict, issue_number: int + ) -> Document: + """ + Create a Document from comment data. + + :param comment_data: Comment data from GitHub API + :param issue_number: Parent issue number + :return: Haystack Document + """ + return Document( + content=comment_data["body"], + meta={ + "type": "comment", + "issue_number": issue_number, + "created_at": comment_data["created_at"], + "updated_at": comment_data["updated_at"], + "author": comment_data["user"]["login"], + "url": comment_data["html_url"], + }, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueViewer": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(documents=List[Document]) + def run(self, url: str) -> dict: + """ + Process a GitHub issue URL and return documents. + + :param url: GitHub issue URL + :return: Dictionary containing list of documents + """ + try: + owner, repo, issue_number = self._parse_github_url(url) + + # Fetch issue data + issue_data = self._fetch_issue(owner, repo, issue_number) + documents = [self._create_issue_document(issue_data)] + + # Fetch and process comments if they exist + if issue_data["comments"] > 0: + comments = self._fetch_comments(issue_data["comments_url"]) + documents.extend( + self._create_comment_document(comment, issue_number) + for comment in comments + ) + + return {"documents": documents} + + except Exception as e: + if self.raise_on_failure: + raise + + error_message = f"Error processing GitHub issue {url}: {str(e)}" + logger.warning(error_message) + error_doc = Document( + content=error_message, + meta={ + "error": True, + "type": "error", + "url": url, + } + ) + return {"documents": [error_doc]} + diff --git a/integrations/github_haystack/src/github_haystack/github_components/pr_creator.py b/integrations/github_haystack/src/github_haystack/github_components/pr_creator.py new file mode 100644 index 0000000000..d27d8cb064 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/github_components/pr_creator.py @@ -0,0 +1,171 @@ +import re +from typing import Any, Dict + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@component +class GithubPRCreator: + """ + A Haystack component for creating pull requests from a fork back to the original repository. + + Uses the authenticated user's fork to create the PR and links it to an existing issue. + + ### Usage example + ```python + from haystack.components.actions import GithubPRCreator + from haystack.utils import Secret + + pr_creator = GithubPRCreator( + github_token=Secret.from_env_var("GITHUB_TOKEN") # Token from the fork owner + ) + + # Create a PR from your fork + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/123", + title="Fix issue #123", + body="This PR addresses issue #123", + branch="feature-branch", # The branch in your fork with the changes + base="main" # The branch in the original repo to merge into + ) + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for authentication (from the fork owner) + :param raise_on_failure: If True, raises exceptions on API errors + """ + if not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + + def _get_headers(self) -> Dict[str, str]: + """ + Get headers for GitHub API requests with resolved token. + + :return: Dictionary of request headers + """ + return { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"Bearer {self.github_token.resolve_value()}", + "User-Agent": "Haystack/GithubPRCreator" + } + + def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: + """ + Parse owner, repo name, and issue number from GitHub issue URL. + + :param issue_url: Full GitHub issue URL + :return: Tuple of (owner, repo_name, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, issue_url) + if not match: + raise ValueError("Invalid GitHub issue URL format") + return match.group(1), match.group(2), match.group(3) + + def _get_authenticated_user(self) -> str: + """Get the username of the authenticated user (fork owner).""" + response = requests.get( + "https://api.github.com/user", + headers=self._get_headers() + ) + response.raise_for_status() + return response.json()["login"] + + def _check_fork_exists(self, owner: str, repo: str, fork_owner: str) -> bool: + """Check if the fork exists.""" + url = f"https://api.github.com/repos/{fork_owner}/{repo}" + try: + response = requests.get(url, headers=self._get_headers()) + response.raise_for_status() + fork_data = response.json() + return fork_data.get("fork", False) + except requests.RequestException: + return False + + @component.output_types(result=str) + def run( + self, + issue_url: str, + title: str, + branch: str, + base: str, + body: str = "", + draft: bool = False + ) -> Dict[str, str]: + """ + Create a new pull request from your fork to the original repository, linked to the specified issue. + + :param issue_url: URL of the GitHub issue to link the PR to + :param title: Title of the pull request + :param branch: Name of the branch in your fork where changes are implemented + :param base: Name of the branch in the original repo you want to merge into + :param body: Additional content for the pull request description + :param draft: Whether to create a draft pull request + :return: Dictionary containing operation result + """ + try: + # Parse repository information from issue URL + owner, repo_name, issue_number = self._parse_issue_url(issue_url) + + # Get the authenticated user (fork owner) + fork_owner = self._get_authenticated_user() + + # Check if the fork exists + if not self._check_fork_exists(owner, repo_name, fork_owner): + return {"result": f"Error: Fork not found at {fork_owner}/{repo_name}"} + + url = f"https://api.github.com/repos/{owner}/{repo_name}/pulls" + + # For cross-repository PRs, head must be in the format username:branch + head = f"{fork_owner}:{branch}" + + pr_data = { + "title": title, + "body": body, + "head": head, + "base": base, + "draft": draft, + "maintainer_can_modify": True, # Allow maintainers to modify the PR + } + + response = requests.post(url, headers=self._get_headers(), json=pr_data) + response.raise_for_status() + pr_number = response.json()["number"] + + return {"result": f"Pull request #{pr_number} created successfully and linked to issue #{issue_number}"} + + except (requests.RequestException, ValueError) as e: + if self.raise_on_failure: + raise + return {"result": f"Error: {str(e)}"} + + def to_dict(self) -> Dict[str, Any]: + """Serialize the component to a dictionary.""" + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubPRCreator": + """Deserialize the component from a dictionary.""" + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github_haystack/src/github_haystack/github_components/repo_viewer.py b/integrations/github_haystack/src/github_haystack/github_components/repo_viewer.py new file mode 100644 index 0000000000..c2b9b2a018 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/github_components/repo_viewer.py @@ -0,0 +1,263 @@ +import base64 +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +import requests +from haystack import Document, component, logging +from haystack.utils import Secret + +logger = logging.getLogger(__name__) + + +@dataclass +class GitHubItem: + """Represents an item (file or directory) in a GitHub repository""" + + name: str + type: str # "file" or "dir" + path: str + size: int + url: str + content: Optional[str] = None + + +@component +class GithubRepositoryViewer: + """ + Navigates and fetches content from GitHub repositories. + + For directories: + - Returns a list of Documents, one for each item + - Each Document's content is the item name + - Full path and metadata in Document.meta + + For files: + - Returns a single Document + - Document's content is the file content + - Full path and metadata in Document.meta + + For errors: + - Returns a single Document + - Document's content is the error message + - Document's meta contains type="error" + + ### Usage example + ```python + from haystack.components.fetchers import GithubRepositoryViewer + from haystack.utils import Secret + + # Using token directly + viewer = GithubRepositoryViewer(github_token=Secret.from_token("your_token")) + + # Using environment variable + viewer = GithubRepositoryViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) + + # List directory contents - returns multiple documents + result = viewer.run( + repo="owner/repository", + path="docs/", + ref="main" + ) + + # Get specific file - returns single document + result = viewer.run( + repo="owner/repository", + path="README.md", + ref="main" + ) + ``` + """ + + def __init__( + self, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + max_file_size: int = 1_000_000, # 1MB default limit + repo: Optional[str] = None, + branch: Optional[str] = None + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param max_file_size: Maximum file size in bytes to fetch (default: 1MB) + """ + if github_token is not None and not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.max_file_size = max_file_size + self.repo = repo + self.branch = branch + + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubRepositoryViewer", + } + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return { + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "max_file_size": self.max_file_size, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubRepositoryViewer": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data.copy() + if init_params["github_token"]: + init_params["github_token"] = Secret.from_dict(init_params["github_token"]) + return cls(**init_params) + + def _parse_repo(self, repo: str) -> tuple[str, str]: + """Parse owner/repo string""" + parts = repo.split("/") + if len(parts) != 2: + raise ValueError( + f"Invalid repository format. Expected 'owner/repo', got '{repo}'" + ) + return parts[0], parts[1] + + def _normalize_path(self, path: str) -> str: + """Normalize repository path""" + return path.strip("/") + + def _fetch_contents(self, owner: str, repo: str, path: str, ref: str) -> Any: + """Fetch repository contents from GitHub API""" + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" + if ref: + url += f"?ref={ref}" + + headers = self.headers.copy() + if self.github_token: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + + response = requests.get(url, headers=headers) + response.raise_for_status() + return response.json() + + def _process_file_content(self, content: str, encoding: str) -> str: + """Process file content based on encoding""" + if encoding == "base64": + return base64.b64decode(content).decode("utf-8") + return content + + def _create_file_document(self, item: GitHubItem) -> Document: + """Create a Document from a file""" + return Document( + content=item.content if item.content else item.name, + meta={ + "path": item.path, + "type": "file_content", + "size": item.size, + "url": item.url, + }, + ) + + def _create_directory_documents(self, items: List[GitHubItem]) -> List[Document]: + """Create a list of Documents from directory contents""" + return [ + Document( + content=item.name, + meta={ + "path": item.path, + "type": item.type, + "size": item.size, + "url": item.url, + }, + ) + for item in sorted(items, key=lambda x: (x.type != "dir", x.name.lower())) + ] + + def _create_error_document(self, error: Exception, path: str) -> Document: + """Create a Document from an error""" + return Document( + content=str(error), + meta={ + "type": "error", + "path": path, + }, + ) + + @component.output_types(documents=List[Document]) + def run( + self, path: str, repo: Optional[str] = None, branch: Optional[str] = None + ) -> Dict[str, List[Document]]: + """ + Process a GitHub repository path and return documents. + + :param repo: Repository in format "owner/repo" + :param path: Path within repository (default: root) + :param ref: Git reference (branch, tag, commit) to use + :return: Dictionary containing list of documents + """ + if repo is None: + repo = self.repo + if branch is None: + branch = self.branch + + try: + owner, repo_name = self._parse_repo(repo) + normalized_path = self._normalize_path(path) + + contents = self._fetch_contents(owner, repo_name, normalized_path, branch) + + # Handle single file response + if not isinstance(contents, list): + if contents.get("size", 0) > self.max_file_size: + raise ValueError( + f"File size {contents['size']} exceeds limit of {self.max_file_size}" + ) + + item = GitHubItem( + name=contents["name"], + type="file", + path=contents["path"], + size=contents["size"], + url=contents["html_url"], + content=self._process_file_content( + contents["content"], contents["encoding"] + ), + ) + return {"documents": [self._create_file_document(item)]} + + # Handle directory listing + items = [ + GitHubItem( + name=item["name"], + type="dir" if item["type"] == "dir" else "file", + path=item["path"], + size=item.get("size", 0), + url=item["html_url"], + ) + for item in contents + ] + + return {"documents": self._create_directory_documents(items)} + + except Exception as e: + error_doc = self._create_error_document( + f"Error processing repository path {path}: {str(e)}. Seems like the file does not exist.", path + ) + if self.raise_on_failure: + raise + logger.warning( + "Error processing repository path {path}: {error}", + path=path, + error=str(e), + ) + return {"documents": [error_doc]} + diff --git a/integrations/github_haystack/src/github_haystack/github_components/repository_forker.py b/integrations/github_haystack/src/github_haystack/github_components/repository_forker.py new file mode 100644 index 0000000000..9dabf8bba5 --- /dev/null +++ b/integrations/github_haystack/src/github_haystack/github_components/repository_forker.py @@ -0,0 +1,298 @@ +import re +from typing import Any, Dict, Optional + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@component +class GithubRepoForker: + """ + Forks a GitHub repository from an issue URL. + + The component takes a GitHub issue URL, extracts the repository information, + creates or syncs a fork of that repository, and optionally creates an issue-specific branch. + + ### Usage example + ```python + from haystack.components.actions import GithubRepoForker + from haystack.utils import Secret + + # Using direct token with auto-sync and branch creation + forker = GithubRepoForker( + github_token=Secret.from_token("your_token"), + auto_sync=True, + create_branch=True + ) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + # Will create or sync fork and create branch "fix-123" + ``` + """ + + def __init__( + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + wait_for_completion: bool = False, + max_wait_seconds: int = 300, + poll_interval: int = 2, + auto_sync: bool = True, + create_branch: bool = True, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param wait_for_completion: If True, waits until fork is fully created + :param max_wait_seconds: Maximum time to wait for fork completion in seconds + :param poll_interval: Time between status checks in seconds + :param auto_sync: If True, syncs fork with original repository if it already exists + :param create_branch: If True, creates a fix branch based on the issue number + """ + if not isinstance(github_token, Secret): + raise TypeError("github_token must be a Secret") + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.wait_for_completion = wait_for_completion + self.max_wait_seconds = max_wait_seconds + self.poll_interval = poll_interval + self.auto_sync = auto_sync + self.create_branch = create_branch + + self.headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubRepoForker" + } + + def _parse_github_url(self, url: str) -> tuple[str, str, str]: + """ + Parse GitHub URL into owner, repo, and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + raise ValueError(f"Invalid GitHub issue URL format: {url}") + + owner, repo, issue_number = match.groups() + return owner, repo, issue_number + + def _check_fork_status(self, fork_path: str) -> bool: + """ + Check if a forked repository exists and is ready. + + :param fork_path: Repository path in owner/repo format + :return: True if fork exists and is ready, False otherwise + """ + url = f"https://api.github.com/repos/{fork_path}" + try: + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + return response.status_code == 200 + except requests.RequestException: + return False + + def _get_authenticated_user(self) -> str: + """ + Get the authenticated user's username. + + :return: Username of the authenticated user + :raises requests.RequestException: If API call fails + """ + url = "https://api.github.com/user" + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + return response.json()["login"] + + def _get_existing_repository(self, repo_name: str) -> Optional[str]: + """ + Check if a repository with the given name already exists in the authenticated user's account. + + :param repo_name: Repository name to check + :return: Full repository name if it exists, None otherwise + """ + url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" + try: + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + if response.status_code == 200: + return repo_name + return None + except requests.RequestException as e: + logger.warning(f"Failed to check repository existence: {str(e)}") + return None + + def _sync_fork(self, fork_path: str) -> None: + """ + Sync a fork with its upstream repository. + + :param fork_path: Fork path in owner/repo format + :raises requests.RequestException: If sync fails + """ + url = f"https://api.github.com/repos/{fork_path}/merge-upstream" + response = requests.post( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + json={"branch": "main"} + ) + response.raise_for_status() + + def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: + """ + Create a new branch for the issue. + + :param fork_path: Fork path in owner/repo format + :param issue_number: Issue number to use in branch name + :raises requests.RequestException: If branch creation fails + """ + # First, get the default branch SHA + url = f"https://api.github.com/repos/{fork_path}" + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + default_branch = response.json()["default_branch"] + + # Get the SHA of the default branch + url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" + response = requests.get( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + sha = response.json()["object"]["sha"] + + # Create the new branch + branch_name = f"fix-{issue_number}" + url = f"https://api.github.com/repos/{fork_path}/git/refs" + response = requests.post( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + json={ + "ref": f"refs/heads/{branch_name}", + "sha": sha + } + ) + response.raise_for_status() + + def _create_fork(self, owner: str, repo: str) -> str: + """ + Create a fork of the repository. + + :param owner: Original repository owner + :param repo: Repository name + :return: Fork path in owner/repo format + :raises requests.RequestException: If fork creation fails + """ + url = f"https://api.github.com/repos/{owner}/{repo}/forks" + response = requests.post( + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + ) + response.raise_for_status() + + fork_data = response.json() + return f"{fork_data['owner']['login']}/{fork_data['name']}" + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + wait_for_completion=self.wait_for_completion, + max_wait_seconds=self.max_wait_seconds, + poll_interval=self.poll_interval, + auto_sync=self.auto_sync, + create_branch=self.create_branch, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GithubRepoForker": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(repo=str, issue_branch=str) + def run(self, url: str) -> dict: + """ + Process a GitHub issue URL and create or sync a fork of the repository. + + :param url: GitHub issue URL + :return: Dictionary containing repository path in owner/repo format + """ + try: + # Extract repository information + owner, repo, issue_number = self._parse_github_url(url) + + # Check if fork already exists + user = self._get_authenticated_user() + existing_fork = self._get_existing_repository(repo) + + if existing_fork and self.auto_sync: + # If fork exists and auto_sync is enabled, sync with upstream + fork_path = f"{user}/{repo}" + logger.info("Fork already exists, syncing with upstream repository") + self._sync_fork(fork_path) + else: + # Create new fork + fork_path = self._create_fork(owner, repo) + + # Wait for fork completion if requested + if self.wait_for_completion: + import time + start_time = time.time() + + while time.time() - start_time < self.max_wait_seconds: + if self._check_fork_status(fork_path): + logger.info("Fork creation completed successfully") + break + logger.debug("Waiting for fork creation to complete...") + time.sleep(self.poll_interval) + else: + msg = f"Fork creation timed out after {self.max_wait_seconds} seconds" + if self.raise_on_failure: + raise TimeoutError(msg) + logger.warning(msg) + + # Create issue branch if enabled + issue_branch = None + if self.create_branch: + issue_branch = f"fix-{issue_number}" + logger.info(f"Creating branch for issue #{issue_number}") + self._create_issue_branch(fork_path, issue_number) + + return {"repo": fork_path, "issue_branch": issue_branch} + + except Exception as e: + if self.raise_on_failure: + raise + logger.warning("Error forking repository from {url}: {error}", url=url, error=str(e)) + return {"repo": "", "issue_branch": None} diff --git a/integrations/github_haystack/tests/__init__.py b/integrations/github_haystack/tests/__init__.py new file mode 100644 index 0000000000..d391382c6b --- /dev/null +++ b/integrations/github_haystack/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 From b99ad72571eea29684dedbaf349b52a9a87db7ca Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 10 Apr 2025 15:21:28 +0200 Subject: [PATCH 03/51] remove github-haystack --- integrations/github-haystack/LICENSE.txt | 73 ----- integrations/github-haystack/README.md | 21 -- integrations/github-haystack/pyproject.toml | 61 ---- .../src/github_haystack/__about__.py | 4 - .../src/github_haystack/__init__.py | 3 - .../github_haystack/agent_prompts/__init__.py | 8 - .../agent_prompts/comment_tool.py | 22 -- .../github_haystack/agent_prompts/context.py | 175 ---------- .../agent_prompts/file_editor_tool.py | 130 -------- .../agent_prompts/pr_system_prompt.py | 53 ---- .../agent_prompts/repo_viewer_tool.py | 78 ----- .../agent_prompts/system_prompt.py | 61 ---- .../github_components/file_editor.py | 299 ------------------ .../github_components/issue_commenter.py | 155 --------- .../github_components/issue_viewer.py | 218 ------------- .../github_components/pr_creator.py | 171 ---------- .../github_components/repo_viewer.py | 263 --------------- .../github_components/repository_forker.py | 298 ----------------- .../github-haystack/tests/__init__.py | 3 - 19 files changed, 2096 deletions(-) delete mode 100644 integrations/github-haystack/LICENSE.txt delete mode 100644 integrations/github-haystack/README.md delete mode 100644 integrations/github-haystack/pyproject.toml delete mode 100644 integrations/github-haystack/src/github_haystack/__about__.py delete mode 100644 integrations/github-haystack/src/github_haystack/__init__.py delete mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py delete mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py delete mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/context.py delete mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py delete mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py delete mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py delete mode 100644 integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py delete mode 100644 integrations/github-haystack/src/github_haystack/github_components/file_editor.py delete mode 100644 integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py delete mode 100644 integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py delete mode 100644 integrations/github-haystack/src/github_haystack/github_components/pr_creator.py delete mode 100644 integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py delete mode 100644 integrations/github-haystack/src/github_haystack/github_components/repository_forker.py delete mode 100644 integrations/github-haystack/tests/__init__.py diff --git a/integrations/github-haystack/LICENSE.txt b/integrations/github-haystack/LICENSE.txt deleted file mode 100644 index 137069b823..0000000000 --- a/integrations/github-haystack/LICENSE.txt +++ /dev/null @@ -1,73 +0,0 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. - -"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: - - (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. - - You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - -To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/integrations/github-haystack/README.md b/integrations/github-haystack/README.md deleted file mode 100644 index a816d2c6d0..0000000000 --- a/integrations/github-haystack/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# github-haystack - -[![PyPI - Version](https://img.shields.io/pypi/v/github-haystack.svg)](https://pypi.org/project/github-haystack) -[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/github-haystack.svg)](https://pypi.org/project/github-haystack) - ------ - -## Table of Contents - -- [Installation](#installation) -- [License](#license) - -## Installation - -```console -pip install github-haystack -``` - -## License - -`github-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/github-haystack/pyproject.toml b/integrations/github-haystack/pyproject.toml deleted file mode 100644 index 539aa0c09a..0000000000 --- a/integrations/github-haystack/pyproject.toml +++ /dev/null @@ -1,61 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "github-haystack" -dynamic = ["version"] -description = 'Haystack components for interacting with GitHub repositories' -readme = "README.md" -requires-python = ">=3.8" -license = "Apache-2.0" -keywords = [] -authors = [ - { name = "deepset GmbH", email = "info@deepset.ai" }, -] -classifiers = [ - "Development Status :: 4 - Beta", - "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] -dependencies = [] - -[project.urls] -Documentation = "https://github.com/deepset GmbH/github-haystack#readme" -Issues = "https://github.com/deepset GmbH/github-haystack/issues" -Source = "https://github.com/deepset GmbH/github-haystack" - -[tool.hatch.version] -path = "src/github_haystack/__about__.py" - -[tool.hatch.envs.types] -extra-dependencies = [ - "mypy>=1.0.0", -] -[tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive {args:src/github_haystack tests}" - -[tool.coverage.run] -source_pkgs = ["github_haystack", "tests"] -branch = true -parallel = true -omit = [ - "src/github_haystack/__about__.py", -] - -[tool.coverage.paths] -github_haystack = ["src/github_haystack", "*/github-haystack/src/github_haystack"] -tests = ["tests", "*/github-haystack/tests"] - -[tool.coverage.report] -exclude_lines = [ - "no cov", - "if __name__ == .__main__.:", - "if TYPE_CHECKING:", -] diff --git a/integrations/github-haystack/src/github_haystack/__about__.py b/integrations/github-haystack/src/github_haystack/__about__.py deleted file mode 100644 index 348b27fe81..0000000000 --- a/integrations/github-haystack/src/github_haystack/__about__.py +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-FileCopyrightText: 2025-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -__version__ = "0.0.1" diff --git a/integrations/github-haystack/src/github_haystack/__init__.py b/integrations/github-haystack/src/github_haystack/__init__.py deleted file mode 100644 index d391382c6b..0000000000 --- a/integrations/github-haystack/src/github_haystack/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2025-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py b/integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py deleted file mode 100644 index 984d948c61..0000000000 --- a/integrations/github-haystack/src/github_haystack/agent_prompts/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-FileCopyrightText: 2022-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 - -from .repo_viewer_tool import repo_viewer_prompt, repo_viewer_schema -from .system_prompt import issue_prompt - -_all_ = ["issue_prompt", "repo_viewer_prompt", "repo_viewer_schema"] diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py b/integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py deleted file mode 100644 index 27baf46e5c..0000000000 --- a/integrations/github-haystack/src/github_haystack/agent_prompts/comment_tool.py +++ /dev/null @@ -1,22 +0,0 @@ -comment_prompt = """ -Haystack-Agent uses this tool to post a comment to a Github-issue discussion. - - -Pass a `comment` string to post a comment. - - -IMPORTANT -Haystack-Agent MUST pass "comment" to this tool. Otherwise, comment creation fails. -Haystack-Agent always passes the contents of the comment to the "comment" parameter when calling this tool. -""" - -comment_schema = { - "properties": { - "comment": { - "type": "string", - "description": "The contents of the comment that you want to create." - } - }, - "required": ["comment"], - "type": "object" -} diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/context.py b/integrations/github-haystack/src/github_haystack/agent_prompts/context.py deleted file mode 100644 index ff45fccde9..0000000000 --- a/integrations/github-haystack/src/github_haystack/agent_prompts/context.py +++ /dev/null @@ -1,175 +0,0 @@ -haystack_context_prompt = """ - -Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related -questions. -The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. -This information is not a replacement for carefully exploring relevant repositories before posting a comment. - -**Haystack Description** -An Open-Source Python framework for developers worldwide. -AI orchestration framework to build customizable, production-ready LLM applications. -Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. -With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or -conversational agent chatbots. - -**High-Level Architecture** -Haystack has two central abstractions: -- Components -- Pipelines - -A Component is a lightweight abstraction that gets inputs, performs an action and returns outputs. -Some example components: -- `OpenAIGenerator`: receives a prompt and generates replies to the prompt by calling an OpenAI-model -- `MetadataRouter`: routes documents to configurable outputs based on their metadata -- `BM25Retriever`: retrieves documents from a 'DocumentStore' based on the 'query'-input - -A component is lightweight. It is easy to implement custom components. Here is some information from the docs: - -Requirements - -Here are the requirements for all custom components: - -- `@component`: This decorator marks a class as a component, allowing it to be used in a pipeline. -- `run()`: This is a required method in every component. It accepts input arguments and returns a `dict`. The inputs can -either come from the pipeline when it’s executed, or from the output of another component when connected using -`connect()`. The `run()` method should be compatible with the input/output definitions declared for the component. -See an [Extended Example](#extended-example) below to check how it works. - -## Inputs and Outputs - -Next, define the inputs and outputs for your component. - -### Inputs - -You can choose between three input options: - -- `set_input_type`: This method defines or updates a single input socket for a component instance. It’s ideal for adding -or modifying a specific input at runtime without affecting others. Use this when you need to dynamically set or modify -a single input based on specific conditions. -- `set_input_types`: This method allows you to define multiple input sockets at once, replacing any existing inputs. -It’s useful when you know all the inputs the component will need and want to configure them in bulk. Use this when you -want to define multiple inputs during initialization. -- Declaring arguments directly in the `run()` method. Use this method when the component’s inputs are static and known -at the time of class definition. - -### Outputs - -You can choose between two output options: - -- `@component.output_types`: This decorator defines the output types and names at the time of class definition. The -output names and types must match the `dict` returned by the `run()` method. Use this when the output types are static -and known in advance. This decorator is cleaner and more readable for static components. -- `set_output_types`: This method defines or updates multiple output sockets for a component instance at runtime. -It’s useful when you need flexibility in configuring outputs dynamically. Use this when the output types need to be set -at runtime for greater flexibility. - -# Short Example - -Here is an example of a simple minimal component setup: - -```python -from haystack import component - -@component -class WelcomeTextGenerator: - ''' - A component generating personal welcome message and making it upper case - ''' - @component.output_types(welcome_text=str, note=str) - def run(self, name:str): - return {"welcome_text": f'Hello {name}, welcome to Haystack!'.upper(), "note": "welcome message is ready"} - -``` - -Here, the custom component `WelcomeTextGenerator` accepts one input: `name` string and returns two outputs: -`welcome_text` and `note`. - - ----------- - -**Pipelines** -The pipelines in Haystack 2.0 are directed multigraphs of different Haystack components and integrations. -They give you the freedom to connect these components in various ways. This means that the -pipeline doesn't need to be a continuous stream of information. With the flexibility of Haystack pipelines, -you can have simultaneous flows, standalone components, loops, and other types of connections. - -# Steps to Create a Pipeline Explained - -Once all your components are created and ready to be combined in a pipeline, there are four steps to make it work: - -1. Create the pipeline with `Pipeline()`. - This creates the Pipeline object. -2. Add components to the pipeline, one by one, with `.add_component(name, component)`. - This just adds components to the pipeline without connecting them yet. It's especially useful for loops as it allows - the smooth connection of the components in the next step because they all already exist in the pipeline. -3. Connect components with `.connect("producer_component.output_name", "consumer_component.input_name")`. - At this step, you explicitly connect one of the outputs of a component to one of the inputs of the next component. - This is also when the pipeline validates the connection without running the components. It makes the validation fast. -4. Run the pipeline with `.run({"component_1": {"mandatory_inputs": value}})`. - Finally, you run the Pipeline by specifying the first component in the pipeline and passing its mandatory inputs. - - Optionally, you can pass inputs to other components, for example: - `.run({"component_1": {"mandatory_inputs": value}, "component_2": {"inputs": value}})`. - -The full pipeline [example](/docs/creating-pipelines#example) in [Creating Pipelines](/docs/creating-pipelines) shows -how all the elements come together to create a working RAG pipeline. - -Once you create your pipeline, you can [visualize it in a graph](/docs/drawing-pipeline-graphs) to understand how the -components are connected and make sure that's how you want them. You can use Mermaid graphs to do that. - -# Validation - -Validation happens when you connect pipeline components with `.connect()`, but before running the components to make it -faster. The pipeline validates that: - -- The components exist in the pipeline. -- The components' outputs and inputs match and are explicitly indicated. For example, if a component produces two -outputs, when connecting it to another component, you must indicate which output connects to which input. -- The components' types match. -- For input types other than `Variadic`, checks if the input is already occupied by another connection. - -All of these checks produce detailed errors to help you quickly fix any issues identified. - -# Serialization - -Thanks to serialization, you can save and then load your pipelines. Serialization is converting a Haystack pipeline -into a format you can store on disk or send over the wire. It's particularly useful for: - -- Editing, storing, and sharing pipelines. -- Modifying existing pipelines in a format different than Python. - -Haystack pipelines delegate the serialization to its components, so serializing a pipeline simply means serializing -each component in the pipeline one after the other, along with their connections. The pipeline is serialized into a -dictionary format, which acts as an intermediate format that you can then convert into the final format you want. - -> 📘 Serialization formats -> -> Haystack 2.0 only supports YAML format at this time. We'll be rolling out more formats gradually. - -For serialization to be possible, components must support conversion from and to Python dictionaries. All Haystack -components have two methods that make them serializable: `from_dict` and `to_dict`. The `Pipeline` class, in turn, has -its own `from_dict` and `to_dict` methods that take care of serializing components and connections. - - ---------- - -**Haystack Repositories** - -1. "deepset-ai/haystack" - -Contains the core code for the Haystack framework and a few components. -The components that are part of this repository typically don't have heavy dependencies. - - -2. "deepset-ai/haystack-core-integrations" - -This is a mono-repo maintained by the deepset-Team that contains integrations for the Haystack framework. -Typically, an integration consists of one or more components. Some integrations only contain document stores. -Each integration is a standalone pypi-package but you can find all of them in the core integrations repo. - - -3. "deepset-ai/haystack-experimental" - -Contains experimental features for the Haystack framework. - -""" diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py b/integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py deleted file mode 100644 index 61ac77b0d5..0000000000 --- a/integrations/github-haystack/src/github_haystack/agent_prompts/file_editor_tool.py +++ /dev/null @@ -1,130 +0,0 @@ -file_editor_prompt = """ -Use the file editor to edit an existing file in the repository. - -You must provide a 'command' for the action that you want to perform: -- edit -- create -- delete -- undo - -The 'payload' contains your options for each command. - -**Command 'edit'** - -To edit a file, you need to provide: -1. The path to the file -2. The original code snippet from the file -3. Your replacement code -4. A commit message - -The code will only be replaced if it is unique in the file. Pass a minimum of 2 consecutive lines that should -be replaced. If the original is not unique, the editor will return an error. -Pay attention to whitespace both for the original as well as the replacement. - -The commit message should be short and communicate your intention. -Use the conventional commit style for your messages. - -Example: -{ - "command": "edit", - "payload": { - "path": "README.md", - "original": "This is a placeholder description!\\nIt should be updated.", - "replacement": "This project helps developers test AI applications.", - "message": "docs: README should mention project purpose." - } -} - - -**Command 'create'** - -To create a file, you need to provide: -1. The path for the new file -2. The content for the file -3. A commit message - -The commit message should be short and communicate your intention. -Use the conventional commit style for your messages. - -IMPORTANT: -You MUST ALWAYS provide 'content' when creating a new file. File creation with empty content does not work. - -Example: -{ - "command": "create", - "payload": { - "path": "CONTRIBUTING.md", - "content": "Contributions are welcome, please write tests and follow our code style guidelines.", - "message": "chore: minimal instructions for contributors" - } -} - - -**Command 'delete'** - -To delete a file, you need to provide: -1. The path to the file to delete -2. A commit message - -The commit message should be short and communicate your intention. -Use the conventional commit style for your messages. - -Example: -{ - "command": "delete", - "payload": { - "path": "tests/components/test_messaging", - "message": "chore: messaging feature was removed" - } -} - -**Command 'undo'** - -This is how to undo your latest change. - -Important notes: -- You can only undo your own changes -- You can only undo one change at a time -- You need to provide a message for the undo operation - -Example: -{ - "command": "undo", - "payload": { - "message": "revert: undo previous commit due to failing tests" - } -} -""" - -file_editor_schema = { - "type": "object", - "properties": { - "command": { - "type": "string", - "enum": ["edit", "create", "delete", "undo"], - "description": "The command to execute" - }, - "payload": { - "type": "object", - "required": ["message"], - "properties": { - "message": { - "type": "string" - }, - "content": { - "type": "string" - }, - "path": { - "type": "string" - }, - "original": { - "type": "string" - }, - "replacement": { - "type": "string" - } - } - } - }, - "required": ["command", "payload"] -} diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py b/integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py deleted file mode 100644 index a99c8ae100..0000000000 --- a/integrations/github-haystack/src/github_haystack/agent_prompts/pr_system_prompt.py +++ /dev/null @@ -1,53 +0,0 @@ -system_prompt = """ -The assistant is Haystack-Agent, created by deepset. -Haystack-Agent creates Pull Requests that resolve GitHub issues. - -Haystack-Agent receives a GitHub issue and all current comments. -Haystack-Agent analyzes the issue, creates code changes, and submits a Pull Request. - -**Issue Analysis** -Haystack-Agent reviews all implementation suggestions in the comments. -Haystack-Agent evaluates each proposed approach and determines if it adequately solves the issue. -Haystack-Agent uses the `repository_viewer` utility to examine repository files. -Haystack-Agent views any files that are directly referenced in the issue, to understand the context of the issue. -Haystack-Agent follows instructions that are provided in the comments, when they make sense. - -**Software Engineering** -Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. -Haystack-Agent finds the right level of abstraction and complexity. -When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and -documentation patterns that are already being used in the codebase. -Haystack-Agent may propose better code style, documentation, or architecture when appropriate. -Haystack-Agent needs context on the code being discussed before starting to resolve the issue. -Haystack-Agent produces code that can be merged without needing manual intervention from other developers. -Haystack-Agent adapts to the comment style, that is already being used in the codebase. -It avoids superfluous comments that point out the obvious. When Haystack-Agent wants to explain code changes, -it uses the PR description for that. - -**Thinking Process** -Haystack-Agent thinks thoroughly about each issue. -Haystack-Agent takes time to consider all aspects of the implementation. -A lengthy thought process is acceptable and often necessary for proper resolution. - - -Haystack-Agent notes down any thoughts and observations in the scratchpad, so that it can reference them later. - - -**Resolution Process** -Haystack-Agent follows these steps to resolve issues: - -1. Analyze the issue and comments, noting all proposed implementations -2. Explore the repository from the root (/) directory -3. Examine files referenced in the issue or comments -4. View additional files and test cases to understand intended behavior -5. Create initial test cases to validate the planned solution -6. Edit repository source code to resolve the issue -7. Update test cases to match code changes -8. Handle edge cases and ensure code matches repository style -9. Create a Pull Request using the `create_pr` utility - -**Pull Request Creation** -Haystack-Agent writes clear Pull Request descriptions. -Each description explains what changes were made and why they were necessary. -The description helps reviewers understand the implementation approach. -""" diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py b/integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py deleted file mode 100644 index df9cad616b..0000000000 --- a/integrations/github-haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py +++ /dev/null @@ -1,78 +0,0 @@ -repo_viewer_prompt = """ -Haystack-Agent uses this tool to browse GitHub repositories. -Haystack-Agent can view directories and files with this tool. - - -Pass a `repo` string for the repository that you want to view. -It is required to pass `repo` to use this tool. -The structure is "owner/repo-name". - -Pass a `path` string for the directory or file that you want to view. -If you pass an empty path, you will view the root directory of the repository. - -Examples: - -- {"repo": "pandas-dev/pandas", "path": ""} - - will show you the root of the pandas repository -- {"repo": "pandas-dev/pandas", "path": "pyproject.toml"} - - will show you the "pyproject.toml"-file of the pandas repository -- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert"} - - will show you the "albert"-directory in the transformers repository -- {"repo": "huggingface/transformers", "path": "src/transformers/models/albert/albert_modelling.py"} - - will show you the source code for the albert model in the transformers repository - - -Haystack-Agent uses the `github_repository_viewer` to view relevant code. -Haystack-Agent starts at the root of the repository. -Haystack-Agent navigates one level at a time using directory listings. -Haystack-Agent views all relevant code, testing, configuration, or documentation files on a level. -It never skips a directory level or guesses full paths. - -Haystack-Agent thinks deeply about the content of a repository. Before Haystack-Agent uses the tool, it reasons about -next steps: - - -- What am I looking for in this location? -- Why is this path potentially relevant? -- What specific files might help solve the issue? -- What patterns or implementations should I look for? - - -After viewing the contents of a file or directory, Haystack-Agent reflects on its observations before moving on: - -- What did I learn from these files? -- What else might be related? -- Where should I look next and why? - - -IMPORTANT -Haystack-Agent views the content of relevant files, it knows that it is not enough to explore the directory structure. -Haystack-Agent needs to read the code to understand it properly. -To view a file, Haystack-Agent passes the full path of the file to the `github_repository_viewer`. -Haystack-Agent never guesses a file or directory path. - -Haystack-Agent takes notes after viewing code: - -- extract important code snippets -- document key functions, classes or configurations -- note key architecture patterns -- relate findings to the original issue -- relate findings to other code that was already viewed -- note down file paths as a reference - -""" - -repo_viewer_schema = { - "properties": { - "repo": { - "type": "string", - "description": "The owner/repository_name that you want to view." - }, - "path": { - "type": "string", - "description": "Path to directory or file to view. Defaults to repository root.", - } - }, - "required": ["repo"], - "type": "object" -} diff --git a/integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py b/integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py deleted file mode 100644 index 14b612a08e..0000000000 --- a/integrations/github-haystack/src/github_haystack/agent_prompts/system_prompt.py +++ /dev/null @@ -1,61 +0,0 @@ -issue_prompt = """ -The assistant is Haystack-Agent, created by deepset. -Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. - -Haystack-Agent receives a GitHub issue and all current comments. -Haystack-Agent participates in the discussion by: -- helping users find answers to their questions -- analyzing bug reports and proposing a fix when necessary -- analyzing feature requests and proposing an implementation -- being a sounding board in architecture discussions and proposing alternative solutions - -**Style** -Haystack-Agent uses Markdown formatting. When using Markdown, Haystack-Agent always follows best practices for clarity -and consistency. -It always uses a single space after hash symbols for headers (e.g., ”# Header 1”) and leaves a blank line before and -after headers, lists, and code blocks. For emphasis, Haystack-Agent uses asterisks or underscores consistently -(e.g., italic or bold). When creating lists, it aligns items properly and uses a single space after the list marker. -For nested bullets in bullet point lists, Haystack-Agent uses two spaces before the asterisk (*) or hyphen (-) for each -level of nesting. For nested bullets in numbered lists, Haystack-Agent uses three spaces before the number and period -(e.g., “1.”) for each level of nesting. When writing code, Haystack-Agent uses Markdown-blocks with appropriate language -annotation. - -**Software Engineering** -Haystack-Agent creates high-quality code that is easy to understand, performant, secure, easy to test, and maintainable. -Haystack-Agent finds the right level of abstraction and complexity. -When working with other developers on an issue, Haystack-Agent generally adapts to the code, architecture, and -documentation patterns that are already being used in the codebase. -Haystack-Agent may propose better code style, documentation, or architecture when appropriate. -Haystack-Agent needs context on the code being discussed before responding with a comment. -Haystack-Agent does not craft any comments without knowing the code being discussed. -Haystack-Agent can explore any repository on GitHub and view its contents. - -**Exploring Repositories** -Haystack-Agent uses the `repository_viewer` to explore GitHub repositories before crafting a comment. -Haystack-Agent explores more than one repository when the GitHub discussions mentions multiple relevant repositories. - -**Thinking** -Haystack-Agent is a rigorous thinker. It uses -blocks to gather thoughts, reflect on the issue at -hand, and relate its learnings to it. It is not afraid of a lengthy thought process, because it knows that Software -Engineering is a challenging discipline. -Haystack-Agent takes notes on the . The scratchpad holds important pieces of information that -Haystack-Agent wants to reference later. - -**Comments** -Haystack-Agent is friendly, uses accessible language and keeps comments as simple as possible. -When developers address Haystack-Agent directly, it follows their instructions and finds the best response to their -comment. Haystack-Agent is happy to revise its code when a developer asks for it. -Haystack-Agent may disagree with a developer, when the changes being asked for clearly don't help to resolve the issue -or when Haystack-Agent has found a better approach to solving it. -Haystack-Agent uses the `create_comment`-tool to create a comment. Before creating a comment, Haystack-Agent reflects on -the issue, and any learnings from the code analysis. Haystack-Agent only responds when ready. - - -Haystack-Agent, this is IMPORTANT: -- DO NOT START WRITING YOUR RESPONSE UNTIL YOU HAVE COMPLETED THE ENTIRE EXPLORATION PHASE -- VIEWING DIRECTORY LISTINGS IS NOT ENOUGH - YOU MUST EXAMINE FILE CONTENTS -- If you find yourself running out of context space during exploration, say: "I need to continue exploring the codebase -before providing a complete response." Then continue exploration in the next interaction. - -Haystack-Agent will now receive its tools including instructions and will then participate in a Github-issue discussion. -""" diff --git a/integrations/github-haystack/src/github_haystack/github_components/file_editor.py b/integrations/github-haystack/src/github_haystack/github_components/file_editor.py deleted file mode 100644 index cfacfa213e..0000000000 --- a/integrations/github-haystack/src/github_haystack/github_components/file_editor.py +++ /dev/null @@ -1,299 +0,0 @@ -from base64 import b64decode, b64encode -from enum import StrEnum -from typing import Any, Dict, Optional, Union - -import requests -from haystack import component, default_from_dict, default_to_dict, logging -from haystack.utils import Secret, deserialize_secrets_inplace - -logger = logging.getLogger(__name__) - -class Command(StrEnum): - """ - Available commands for file operations in GitHub. - - Attributes: - EDIT: Edit an existing file by replacing content - UNDO: Revert the last commit if made by the same user - CREATE: Create a new file - DELETE: Delete an existing file - """ - EDIT = "edit" - UNDO = "undo" - CREATE = "create" - DELETE = "delete" - -@component -class GithubFileEditor: - """ - A Haystack component for editing files in GitHub repositories. - - Supports editing, undoing changes, deleting files, and creating new files - through the GitHub API. - - ### Usage example - ```python - from haystack.components.actions import GithubFileEditor - from haystack.utils import Secret - - # Initialize with default repo and branch - editor = GithubFileEditor( - github_token=Secret.from_env_var("GITHUB_TOKEN"), - repo="owner/repo", - branch="main" - ) - - # Edit a file using default repo and branch - result = editor.run( - command=Command.EDIT, - payload={ - "path": "path/to/file.py", - "original": "def old_function():", - "replacement": "def new_function():", - "message": "Renamed function for clarity" - } - ) - - # Edit a file in a different repo/branch - result = editor.run( - command=Command.EDIT, - repo="other-owner/other-repo", # Override default repo - branch="feature", # Override default branch - payload={ - "path": "path/to/file.py", - "original": "def old_function():", - "replacement": "def new_function():", - "message": "Renamed function for clarity" - } - ) - ``` - """ - - def __init__( - self, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - repo: Optional[str] = None, - branch: str = "main", - raise_on_failure: bool = True - ): - """ - Initialize the component. - - :param github_token: GitHub personal access token for API authentication - :param repo: Default repository in owner/repo format - :param branch: Default branch to work with - :param raise_on_failure: If True, raises exceptions on API errors - """ - if not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") - - self.github_token = github_token - self.default_repo = repo - self.default_branch = branch - self.raise_on_failure = raise_on_failure - - self.headers = { - "Accept": "application/vnd.github.v3+json", - "Authorization": f"Bearer {self.github_token.resolve_value()}", - "User-Agent": "Haystack/GithubFileEditor" - } - - def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: - """Get file content and SHA from GitHub.""" - url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" - response = requests.get(url, headers=self.headers, params={"ref": branch}) - response.raise_for_status() - data = response.json() - content = b64decode(data["content"]).decode("utf-8") - return content, data["sha"] - - def _update_file( - self, - owner: str, - repo: str, - path: str, - content: str, - message: str, - sha: str, - branch: str - ) -> bool: - """Update file content on GitHub.""" - url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" - payload = { - "message": message, - "content": b64encode(content.encode("utf-8")).decode("utf-8"), - "sha": sha, - "branch": branch - } - response = requests.put(url, headers=self.headers, json=payload) - response.raise_for_status() - return True - - def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: - """Check if last commit was made by the current token user.""" - url = f"https://api.github.com/repos/{owner}/{repo}/commits" - response = requests.get(url, headers=self.headers, params={"per_page": 1, "sha": branch}) - response.raise_for_status() - last_commit = response.json()[0] - commit_author = last_commit["author"]["login"] - - # Get current user - user_response = requests.get("https://api.github.com/user", headers=self.headers) - user_response.raise_for_status() - current_user = user_response.json()["login"] - - return commit_author == current_user - - def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: - """Handle file editing.""" - try: - content, sha = self._get_file_content(owner, repo, payload["path"], branch) - - # Check if original string is unique - occurrences = content.count(payload["original"]) - if occurrences == 0: - return "Error: Original string not found in file" - if occurrences > 1: - return "Error: Original string appears multiple times. Please provide more context" - - # Perform the replacement - new_content = content.replace(payload["original"], payload["replacement"]) - success = self._update_file( - owner, repo, payload["path"], new_content, payload["message"], sha, branch - ) - return "Edit successful" if success else "Edit failed" - - except requests.RequestException as e: - if self.raise_on_failure: - raise - return f"Error: {str(e)}" - - def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: - """Handle undoing changes.""" - try: - if not self._check_last_commit(owner, repo, branch): - return "Error: Last commit was not made by the current user" - - # Reset to previous commit - url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch}" - commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" - - # Get the previous commit SHA - commits = requests.get( - commits_url, - headers=self.headers, - params={"per_page": 2, "sha": branch} - ).json() - previous_sha = commits[1]["sha"] - - # Update branch reference to previous commit - payload = {"sha": previous_sha, "force": True} - response = requests.patch(url, headers=self.headers, json=payload) - response.raise_for_status() - - return "Successfully undid last change" - - except requests.RequestException as e: - if self.raise_on_failure: - raise - return f"Error: {str(e)}" - - def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: - """Handle file creation.""" - try: - url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" - content = b64encode(payload["content"].encode("utf-8")).decode("utf-8") - - data = { - "message": payload["message"], - "content": content, - "branch": branch - } - - response = requests.put(url, headers=self.headers, json=data) - response.raise_for_status() - return "File created successfully" - - except requests.RequestException as e: - if self.raise_on_failure: - raise - return f"Error: {str(e)}" - - def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: - """Handle file deletion.""" - try: - content, sha = self._get_file_content(owner, repo, payload["path"], branch) - url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" - - data = { - "message": payload["message"], - "sha": sha, - "branch": branch - } - - response = requests.delete(url, headers=self.headers, json=data) - response.raise_for_status() - return "File deleted successfully" - - except requests.RequestException as e: - if self.raise_on_failure: - raise - return f"Error: {str(e)}" - - @component.output_types(result=str) - def run( - self, - command: Union[Command, str], - payload: Dict[str, Any], - repo: Optional[str] = None, - branch: Optional[str] = None - ) -> Dict[str, str]: - """ - Process GitHub file operations. - - :param command: Operation to perform ("edit", "undo", "create", "delete") - :param payload: Dictionary containing command-specific parameters - :param repo: Repository in owner/repo format (overrides default if provided) - :param branch: Branch to perform operations on (overrides default if provided) - :return: Dictionary containing operation result - """ - if repo is None: - if self.default_repo is None: - return { - "result": "Error: No repository specified. Either provide it in initialization or in run() method" - } - repo = self.default_repo - - working_branch = branch if branch is not None else self.default_branch - owner, repo_name = repo.split("/") - - command_handlers = { - Command.EDIT: self._edit_file, - Command.UNDO: self._undo_changes, - Command.CREATE: self._create_file, - Command.DELETE: self._delete_file - } - - if command not in command_handlers: - return {"result": f"Error: Unknown command '{command}'"} - - result = command_handlers[command](owner, repo_name, payload, working_branch) - return {"result": result} - - def to_dict(self) -> Dict[str, Any]: - """Serialize the component to a dictionary.""" - return default_to_dict( - self, - github_token=self.github_token.to_dict() if self.github_token else None, - repo=self.default_repo, - branch=self.default_branch, - raise_on_failure=self.raise_on_failure - ) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubFileEditor": - """Deserialize the component from a dictionary.""" - init_params = data["init_parameters"] - deserialize_secrets_inplace(init_params, keys=["github_token"]) - return default_from_dict(cls, data) - diff --git a/integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py b/integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py deleted file mode 100644 index f564eb4e7a..0000000000 --- a/integrations/github-haystack/src/github_haystack/github_components/issue_commenter.py +++ /dev/null @@ -1,155 +0,0 @@ -import re -from typing import Any, Dict, Optional - -import requests -from haystack import component, default_from_dict, default_to_dict, logging -from haystack.utils import deserialize_secrets_inplace -from haystack.utils.auth import Secret - -logger = logging.getLogger(__name__) - - -@component -class GithubIssueCommenter: - """ - Posts comments to GitHub issues. - - The component takes a GitHub issue URL and comment text, then posts the comment - to the specified issue using the GitHub API. - - ### Usage example - ```python - from haystack.components.writers import GithubIssueCommenter - - commenter = GithubIssueCommenter(github_token=Secret.from_env_var("GITHUB_TOKEN")) - result = commenter.run( - url="https://github.com/owner/repo/issues/123", - comment="Thanks for reporting this issue! We'll look into it." - ) - - assert result["success"] is True - ``` - """ - - def __init__( - self, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - raise_on_failure: bool = True, - retry_attempts: int = 2, - ): - """ - Initialize the component. - - :param github_token: GitHub personal access token for API authentication as a Secret - :param raise_on_failure: If True, raises exceptions on API errors - :param retry_attempts: Number of retry attempts for failed requests - """ - self.github_token = github_token - self.raise_on_failure = raise_on_failure - self.retry_attempts = retry_attempts - - # Set base headers during initialization - self.headers = { - "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubIssueCommenter", - } - - def _get_request_headers(self) -> dict: - """ - Get headers with resolved token for the request. - - :return: Dictionary of headers including authorization if token is present - """ - headers = self.headers.copy() - if self.github_token is not None: - headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" - return headers - - def _parse_github_url(self, url: str) -> tuple[str, str, int]: - """ - Parse GitHub URL into owner, repo and issue number. - - :param url: GitHub issue URL - :return: Tuple of (owner, repo, issue_number) - :raises ValueError: If URL format is invalid - """ - pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" - match = re.match(pattern, url) - if not match: - raise ValueError(f"Invalid GitHub issue URL format: {url}") - - owner, repo, issue_number = match.groups() - return owner, repo, int(issue_number) - - def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) -> bool: - """ - Post a comment to a GitHub issue. - - :param owner: Repository owner - :param repo: Repository name - :param issue_number: Issue number - :param comment: Comment text to post - :return: True if comment was posted successfully - :raises requests.exceptions.RequestException: If the API request fails - """ - url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/comments" - data = {"body": comment} - - for attempt in range(self.retry_attempts): - try: - response = requests.post(url, headers=self._get_request_headers(), json=data) - response.raise_for_status() - return True - except requests.exceptions.RequestException as e: - if attempt == self.retry_attempts - 1: - raise - logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying...") - - return False - - def to_dict(self) -> Dict[str, Any]: - """ - Serialize the component to a dictionary. - - :returns: Dictionary with serialized data. - """ - return default_to_dict( - self, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure, - retry_attempts=self.retry_attempts, - ) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueCommenter": - """ - Deserialize the component from a dictionary. - - :param data: Dictionary to deserialize from. - :returns: Deserialized component. - """ - init_params = data["init_parameters"] - deserialize_secrets_inplace(init_params, keys=["github_token"]) - return default_from_dict(cls, data) - - @component.output_types(success=bool) - def run(self, url: str, comment: str) -> dict: - """ - Post a comment to a GitHub issue. - - :param url: GitHub issue URL - :param comment: Comment text to post - :return: Dictionary containing success status - """ - try: - owner, repo, issue_number = self._parse_github_url(url) - success = self._post_comment(owner, repo, issue_number, comment) - return {"success": success} - - except Exception as e: - if self.raise_on_failure: - raise - - error_message = f"Error posting comment to GitHub issue {url}: {str(e)}" - logger.warning(error_message) - return {"success": False} diff --git a/integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py b/integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py deleted file mode 100644 index 89fdaea771..0000000000 --- a/integrations/github-haystack/src/github_haystack/github_components/issue_viewer.py +++ /dev/null @@ -1,218 +0,0 @@ -import re -from typing import Any, Dict, List, Optional - -import requests -from haystack import Document, component, default_from_dict, default_to_dict, logging -from haystack.utils import deserialize_secrets_inplace -from haystack.utils.auth import Secret - -logger = logging.getLogger(__name__) - - -@component -class GithubIssueViewer: - """ - Fetches and parses GitHub issues into Haystack documents. - - The component takes a GitHub issue URL and returns a list of documents where: - - First document contains the main issue content - - Subsequent documents contain the issue comments - - ### Usage example - ```python - from haystack.components.fetchers import GithubIssueViewer - - viewer = GithubIssueViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) - docs = viewer.run( - url="https://github.com/owner/repo/issues/123" - )["documents"] - - assert len(docs) >= 1 # At least the main issue - assert docs[0].meta["type"] == "issue" - ``` - """ - - def __init__( - self, - github_token: Optional[Secret] = None, - raise_on_failure: bool = True, - retry_attempts: int = 2, - ): - """ - Initialize the component. - - :param github_token: GitHub personal access token for API authentication as a Secret - :param raise_on_failure: If True, raises exceptions on API errors - :param retry_attempts: Number of retry attempts for failed requests - """ - self.github_token = github_token - self.raise_on_failure = raise_on_failure - self.retry_attempts = retry_attempts - - # Only set the basic headers during initialization - self.headers = { - "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubIssueViewer", - } - - def _get_request_headers(self) -> dict: - """ - Get headers with resolved token for the request. - - :return: Dictionary of headers including authorization if token is present - """ - headers = self.headers.copy() - if self.github_token: - headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" - return headers - - def _parse_github_url(self, url: str) -> tuple[str, str, int]: - """ - Parse GitHub URL into owner, repo and issue number. - - :param url: GitHub issue URL - :return: Tuple of (owner, repo, issue_number) - :raises ValueError: If URL format is invalid - """ - pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" - match = re.match(pattern, url) - if not match: - raise ValueError(f"Invalid GitHub issue URL format: {url}") - - owner, repo, issue_number = match.groups() - return owner, repo, int(issue_number) - - def _fetch_issue(self, owner: str, repo: str, issue_number: int) -> Any: - """ - Fetch issue data from GitHub API. - - :param owner: Repository owner - :param repo: Repository name - :param issue_number: Issue number - :return: Issue data dictionary - """ - url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}" - response = requests.get(url, headers=self._get_request_headers()) - response.raise_for_status() - return response.json() - - def _fetch_comments(self, comments_url: str) -> Any: - """ - Fetch issue comments from GitHub API. - - :param comments_url: URL for issue comments - :return: List of comment dictionaries - """ - response = requests.get(comments_url, headers=self._get_request_headers()) - response.raise_for_status() - return response.json() - - def _create_issue_document(self, issue_data: dict) -> Document: - """ - Create a Document from issue data. - - :param issue_data: Issue data from GitHub API - :return: Haystack Document - """ - return Document( # type: ignore - content=issue_data["body"], - meta={ - "type": "issue", - "title": issue_data["title"], - "number": issue_data["number"], - "state": issue_data["state"], - "created_at": issue_data["created_at"], - "updated_at": issue_data["updated_at"], - "author": issue_data["user"]["login"], - "url": issue_data["html_url"], - }, - ) - - def _create_comment_document( - self, comment_data: dict, issue_number: int - ) -> Document: - """ - Create a Document from comment data. - - :param comment_data: Comment data from GitHub API - :param issue_number: Parent issue number - :return: Haystack Document - """ - return Document( - content=comment_data["body"], - meta={ - "type": "comment", - "issue_number": issue_number, - "created_at": comment_data["created_at"], - "updated_at": comment_data["updated_at"], - "author": comment_data["user"]["login"], - "url": comment_data["html_url"], - }, - ) - - def to_dict(self) -> Dict[str, Any]: - """ - Serialize the component to a dictionary. - - :returns: Dictionary with serialized data. - """ - return default_to_dict( - self, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure, - retry_attempts=self.retry_attempts, - ) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueViewer": - """ - Deserialize the component from a dictionary. - - :param data: Dictionary to deserialize from. - :returns: Deserialized component. - """ - init_params = data["init_parameters"] - deserialize_secrets_inplace(init_params, keys=["github_token"]) - return default_from_dict(cls, data) - - @component.output_types(documents=List[Document]) - def run(self, url: str) -> dict: - """ - Process a GitHub issue URL and return documents. - - :param url: GitHub issue URL - :return: Dictionary containing list of documents - """ - try: - owner, repo, issue_number = self._parse_github_url(url) - - # Fetch issue data - issue_data = self._fetch_issue(owner, repo, issue_number) - documents = [self._create_issue_document(issue_data)] - - # Fetch and process comments if they exist - if issue_data["comments"] > 0: - comments = self._fetch_comments(issue_data["comments_url"]) - documents.extend( - self._create_comment_document(comment, issue_number) - for comment in comments - ) - - return {"documents": documents} - - except Exception as e: - if self.raise_on_failure: - raise - - error_message = f"Error processing GitHub issue {url}: {str(e)}" - logger.warning(error_message) - error_doc = Document( - content=error_message, - meta={ - "error": True, - "type": "error", - "url": url, - } - ) - return {"documents": [error_doc]} - diff --git a/integrations/github-haystack/src/github_haystack/github_components/pr_creator.py b/integrations/github-haystack/src/github_haystack/github_components/pr_creator.py deleted file mode 100644 index d27d8cb064..0000000000 --- a/integrations/github-haystack/src/github_haystack/github_components/pr_creator.py +++ /dev/null @@ -1,171 +0,0 @@ -import re -from typing import Any, Dict - -import requests -from haystack import component, default_from_dict, default_to_dict, logging -from haystack.utils import Secret, deserialize_secrets_inplace - -logger = logging.getLogger(__name__) - - -@component -class GithubPRCreator: - """ - A Haystack component for creating pull requests from a fork back to the original repository. - - Uses the authenticated user's fork to create the PR and links it to an existing issue. - - ### Usage example - ```python - from haystack.components.actions import GithubPRCreator - from haystack.utils import Secret - - pr_creator = GithubPRCreator( - github_token=Secret.from_env_var("GITHUB_TOKEN") # Token from the fork owner - ) - - # Create a PR from your fork - result = pr_creator.run( - issue_url="https://github.com/owner/repo/issues/123", - title="Fix issue #123", - body="This PR addresses issue #123", - branch="feature-branch", # The branch in your fork with the changes - base="main" # The branch in the original repo to merge into - ) - ``` - """ - - def __init__( - self, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - raise_on_failure: bool = True - ): - """ - Initialize the component. - - :param github_token: GitHub personal access token for authentication (from the fork owner) - :param raise_on_failure: If True, raises exceptions on API errors - """ - if not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") - - self.github_token = github_token - self.raise_on_failure = raise_on_failure - - def _get_headers(self) -> Dict[str, str]: - """ - Get headers for GitHub API requests with resolved token. - - :return: Dictionary of request headers - """ - return { - "Accept": "application/vnd.github.v3+json", - "Authorization": f"Bearer {self.github_token.resolve_value()}", - "User-Agent": "Haystack/GithubPRCreator" - } - - def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: - """ - Parse owner, repo name, and issue number from GitHub issue URL. - - :param issue_url: Full GitHub issue URL - :return: Tuple of (owner, repo_name, issue_number) - :raises ValueError: If URL format is invalid - """ - pattern = r"https://github\.com/([^/]+)/([^/]+)/issues/(\d+)" - match = re.match(pattern, issue_url) - if not match: - raise ValueError("Invalid GitHub issue URL format") - return match.group(1), match.group(2), match.group(3) - - def _get_authenticated_user(self) -> str: - """Get the username of the authenticated user (fork owner).""" - response = requests.get( - "https://api.github.com/user", - headers=self._get_headers() - ) - response.raise_for_status() - return response.json()["login"] - - def _check_fork_exists(self, owner: str, repo: str, fork_owner: str) -> bool: - """Check if the fork exists.""" - url = f"https://api.github.com/repos/{fork_owner}/{repo}" - try: - response = requests.get(url, headers=self._get_headers()) - response.raise_for_status() - fork_data = response.json() - return fork_data.get("fork", False) - except requests.RequestException: - return False - - @component.output_types(result=str) - def run( - self, - issue_url: str, - title: str, - branch: str, - base: str, - body: str = "", - draft: bool = False - ) -> Dict[str, str]: - """ - Create a new pull request from your fork to the original repository, linked to the specified issue. - - :param issue_url: URL of the GitHub issue to link the PR to - :param title: Title of the pull request - :param branch: Name of the branch in your fork where changes are implemented - :param base: Name of the branch in the original repo you want to merge into - :param body: Additional content for the pull request description - :param draft: Whether to create a draft pull request - :return: Dictionary containing operation result - """ - try: - # Parse repository information from issue URL - owner, repo_name, issue_number = self._parse_issue_url(issue_url) - - # Get the authenticated user (fork owner) - fork_owner = self._get_authenticated_user() - - # Check if the fork exists - if not self._check_fork_exists(owner, repo_name, fork_owner): - return {"result": f"Error: Fork not found at {fork_owner}/{repo_name}"} - - url = f"https://api.github.com/repos/{owner}/{repo_name}/pulls" - - # For cross-repository PRs, head must be in the format username:branch - head = f"{fork_owner}:{branch}" - - pr_data = { - "title": title, - "body": body, - "head": head, - "base": base, - "draft": draft, - "maintainer_can_modify": True, # Allow maintainers to modify the PR - } - - response = requests.post(url, headers=self._get_headers(), json=pr_data) - response.raise_for_status() - pr_number = response.json()["number"] - - return {"result": f"Pull request #{pr_number} created successfully and linked to issue #{issue_number}"} - - except (requests.RequestException, ValueError) as e: - if self.raise_on_failure: - raise - return {"result": f"Error: {str(e)}"} - - def to_dict(self) -> Dict[str, Any]: - """Serialize the component to a dictionary.""" - return default_to_dict( - self, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure - ) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubPRCreator": - """Deserialize the component from a dictionary.""" - init_params = data["init_parameters"] - deserialize_secrets_inplace(init_params, keys=["github_token"]) - return default_from_dict(cls, data) diff --git a/integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py b/integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py deleted file mode 100644 index c2b9b2a018..0000000000 --- a/integrations/github-haystack/src/github_haystack/github_components/repo_viewer.py +++ /dev/null @@ -1,263 +0,0 @@ -import base64 -from dataclasses import dataclass -from typing import Any, Dict, List, Optional - -import requests -from haystack import Document, component, logging -from haystack.utils import Secret - -logger = logging.getLogger(__name__) - - -@dataclass -class GitHubItem: - """Represents an item (file or directory) in a GitHub repository""" - - name: str - type: str # "file" or "dir" - path: str - size: int - url: str - content: Optional[str] = None - - -@component -class GithubRepositoryViewer: - """ - Navigates and fetches content from GitHub repositories. - - For directories: - - Returns a list of Documents, one for each item - - Each Document's content is the item name - - Full path and metadata in Document.meta - - For files: - - Returns a single Document - - Document's content is the file content - - Full path and metadata in Document.meta - - For errors: - - Returns a single Document - - Document's content is the error message - - Document's meta contains type="error" - - ### Usage example - ```python - from haystack.components.fetchers import GithubRepositoryViewer - from haystack.utils import Secret - - # Using token directly - viewer = GithubRepositoryViewer(github_token=Secret.from_token("your_token")) - - # Using environment variable - viewer = GithubRepositoryViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) - - # List directory contents - returns multiple documents - result = viewer.run( - repo="owner/repository", - path="docs/", - ref="main" - ) - - # Get specific file - returns single document - result = viewer.run( - repo="owner/repository", - path="README.md", - ref="main" - ) - ``` - """ - - def __init__( - self, - github_token: Optional[Secret] = None, - raise_on_failure: bool = True, - max_file_size: int = 1_000_000, # 1MB default limit - repo: Optional[str] = None, - branch: Optional[str] = None - ): - """ - Initialize the component. - - :param github_token: GitHub personal access token for API authentication - :param raise_on_failure: If True, raises exceptions on API errors - :param max_file_size: Maximum file size in bytes to fetch (default: 1MB) - """ - if github_token is not None and not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") - - self.github_token = github_token - self.raise_on_failure = raise_on_failure - self.max_file_size = max_file_size - self.repo = repo - self.branch = branch - - self.headers = { - "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubRepositoryViewer", - } - - def to_dict(self) -> Dict[str, Any]: - """ - Serialize the component to a dictionary. - - :returns: Dictionary with serialized data. - """ - return { - "github_token": self.github_token.to_dict() if self.github_token else None, - "raise_on_failure": self.raise_on_failure, - "max_file_size": self.max_file_size, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubRepositoryViewer": - """ - Deserialize the component from a dictionary. - - :param data: Dictionary to deserialize from. - :returns: Deserialized component. - """ - init_params = data.copy() - if init_params["github_token"]: - init_params["github_token"] = Secret.from_dict(init_params["github_token"]) - return cls(**init_params) - - def _parse_repo(self, repo: str) -> tuple[str, str]: - """Parse owner/repo string""" - parts = repo.split("/") - if len(parts) != 2: - raise ValueError( - f"Invalid repository format. Expected 'owner/repo', got '{repo}'" - ) - return parts[0], parts[1] - - def _normalize_path(self, path: str) -> str: - """Normalize repository path""" - return path.strip("/") - - def _fetch_contents(self, owner: str, repo: str, path: str, ref: str) -> Any: - """Fetch repository contents from GitHub API""" - url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" - if ref: - url += f"?ref={ref}" - - headers = self.headers.copy() - if self.github_token: - headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" - - response = requests.get(url, headers=headers) - response.raise_for_status() - return response.json() - - def _process_file_content(self, content: str, encoding: str) -> str: - """Process file content based on encoding""" - if encoding == "base64": - return base64.b64decode(content).decode("utf-8") - return content - - def _create_file_document(self, item: GitHubItem) -> Document: - """Create a Document from a file""" - return Document( - content=item.content if item.content else item.name, - meta={ - "path": item.path, - "type": "file_content", - "size": item.size, - "url": item.url, - }, - ) - - def _create_directory_documents(self, items: List[GitHubItem]) -> List[Document]: - """Create a list of Documents from directory contents""" - return [ - Document( - content=item.name, - meta={ - "path": item.path, - "type": item.type, - "size": item.size, - "url": item.url, - }, - ) - for item in sorted(items, key=lambda x: (x.type != "dir", x.name.lower())) - ] - - def _create_error_document(self, error: Exception, path: str) -> Document: - """Create a Document from an error""" - return Document( - content=str(error), - meta={ - "type": "error", - "path": path, - }, - ) - - @component.output_types(documents=List[Document]) - def run( - self, path: str, repo: Optional[str] = None, branch: Optional[str] = None - ) -> Dict[str, List[Document]]: - """ - Process a GitHub repository path and return documents. - - :param repo: Repository in format "owner/repo" - :param path: Path within repository (default: root) - :param ref: Git reference (branch, tag, commit) to use - :return: Dictionary containing list of documents - """ - if repo is None: - repo = self.repo - if branch is None: - branch = self.branch - - try: - owner, repo_name = self._parse_repo(repo) - normalized_path = self._normalize_path(path) - - contents = self._fetch_contents(owner, repo_name, normalized_path, branch) - - # Handle single file response - if not isinstance(contents, list): - if contents.get("size", 0) > self.max_file_size: - raise ValueError( - f"File size {contents['size']} exceeds limit of {self.max_file_size}" - ) - - item = GitHubItem( - name=contents["name"], - type="file", - path=contents["path"], - size=contents["size"], - url=contents["html_url"], - content=self._process_file_content( - contents["content"], contents["encoding"] - ), - ) - return {"documents": [self._create_file_document(item)]} - - # Handle directory listing - items = [ - GitHubItem( - name=item["name"], - type="dir" if item["type"] == "dir" else "file", - path=item["path"], - size=item.get("size", 0), - url=item["html_url"], - ) - for item in contents - ] - - return {"documents": self._create_directory_documents(items)} - - except Exception as e: - error_doc = self._create_error_document( - f"Error processing repository path {path}: {str(e)}. Seems like the file does not exist.", path - ) - if self.raise_on_failure: - raise - logger.warning( - "Error processing repository path {path}: {error}", - path=path, - error=str(e), - ) - return {"documents": [error_doc]} - diff --git a/integrations/github-haystack/src/github_haystack/github_components/repository_forker.py b/integrations/github-haystack/src/github_haystack/github_components/repository_forker.py deleted file mode 100644 index 9dabf8bba5..0000000000 --- a/integrations/github-haystack/src/github_haystack/github_components/repository_forker.py +++ /dev/null @@ -1,298 +0,0 @@ -import re -from typing import Any, Dict, Optional - -import requests -from haystack import component, default_from_dict, default_to_dict, logging -from haystack.utils import Secret, deserialize_secrets_inplace - -logger = logging.getLogger(__name__) - - -@component -class GithubRepoForker: - """ - Forks a GitHub repository from an issue URL. - - The component takes a GitHub issue URL, extracts the repository information, - creates or syncs a fork of that repository, and optionally creates an issue-specific branch. - - ### Usage example - ```python - from haystack.components.actions import GithubRepoForker - from haystack.utils import Secret - - # Using direct token with auto-sync and branch creation - forker = GithubRepoForker( - github_token=Secret.from_token("your_token"), - auto_sync=True, - create_branch=True - ) - - result = forker.run(url="https://github.com/owner/repo/issues/123") - # Will create or sync fork and create branch "fix-123" - ``` - """ - - def __init__( - self, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - raise_on_failure: bool = True, - wait_for_completion: bool = False, - max_wait_seconds: int = 300, - poll_interval: int = 2, - auto_sync: bool = True, - create_branch: bool = True, - ): - """ - Initialize the component. - - :param github_token: GitHub personal access token for API authentication - :param raise_on_failure: If True, raises exceptions on API errors - :param wait_for_completion: If True, waits until fork is fully created - :param max_wait_seconds: Maximum time to wait for fork completion in seconds - :param poll_interval: Time between status checks in seconds - :param auto_sync: If True, syncs fork with original repository if it already exists - :param create_branch: If True, creates a fix branch based on the issue number - """ - if not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") - - self.github_token = github_token - self.raise_on_failure = raise_on_failure - self.wait_for_completion = wait_for_completion - self.max_wait_seconds = max_wait_seconds - self.poll_interval = poll_interval - self.auto_sync = auto_sync - self.create_branch = create_branch - - self.headers = { - "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubRepoForker" - } - - def _parse_github_url(self, url: str) -> tuple[str, str, str]: - """ - Parse GitHub URL into owner, repo, and issue number. - - :param url: GitHub issue URL - :return: Tuple of (owner, repo, issue_number) - :raises ValueError: If URL format is invalid - """ - pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" - match = re.match(pattern, url) - if not match: - raise ValueError(f"Invalid GitHub issue URL format: {url}") - - owner, repo, issue_number = match.groups() - return owner, repo, issue_number - - def _check_fork_status(self, fork_path: str) -> bool: - """ - Check if a forked repository exists and is ready. - - :param fork_path: Repository path in owner/repo format - :return: True if fork exists and is ready, False otherwise - """ - url = f"https://api.github.com/repos/{fork_path}" - try: - response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} - ) - return response.status_code == 200 - except requests.RequestException: - return False - - def _get_authenticated_user(self) -> str: - """ - Get the authenticated user's username. - - :return: Username of the authenticated user - :raises requests.RequestException: If API call fails - """ - url = "https://api.github.com/user" - response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} - ) - response.raise_for_status() - return response.json()["login"] - - def _get_existing_repository(self, repo_name: str) -> Optional[str]: - """ - Check if a repository with the given name already exists in the authenticated user's account. - - :param repo_name: Repository name to check - :return: Full repository name if it exists, None otherwise - """ - url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" - try: - response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} - ) - if response.status_code == 200: - return repo_name - return None - except requests.RequestException as e: - logger.warning(f"Failed to check repository existence: {str(e)}") - return None - - def _sync_fork(self, fork_path: str) -> None: - """ - Sync a fork with its upstream repository. - - :param fork_path: Fork path in owner/repo format - :raises requests.RequestException: If sync fails - """ - url = f"https://api.github.com/repos/{fork_path}/merge-upstream" - response = requests.post( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - json={"branch": "main"} - ) - response.raise_for_status() - - def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: - """ - Create a new branch for the issue. - - :param fork_path: Fork path in owner/repo format - :param issue_number: Issue number to use in branch name - :raises requests.RequestException: If branch creation fails - """ - # First, get the default branch SHA - url = f"https://api.github.com/repos/{fork_path}" - response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} - ) - response.raise_for_status() - default_branch = response.json()["default_branch"] - - # Get the SHA of the default branch - url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" - response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} - ) - response.raise_for_status() - sha = response.json()["object"]["sha"] - - # Create the new branch - branch_name = f"fix-{issue_number}" - url = f"https://api.github.com/repos/{fork_path}/git/refs" - response = requests.post( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - json={ - "ref": f"refs/heads/{branch_name}", - "sha": sha - } - ) - response.raise_for_status() - - def _create_fork(self, owner: str, repo: str) -> str: - """ - Create a fork of the repository. - - :param owner: Original repository owner - :param repo: Repository name - :return: Fork path in owner/repo format - :raises requests.RequestException: If fork creation fails - """ - url = f"https://api.github.com/repos/{owner}/{repo}/forks" - response = requests.post( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} - ) - response.raise_for_status() - - fork_data = response.json() - return f"{fork_data['owner']['login']}/{fork_data['name']}" - - def to_dict(self) -> Dict[str, Any]: - """ - Serialize the component to a dictionary. - - :returns: Dictionary with serialized data. - """ - return default_to_dict( - self, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure, - wait_for_completion=self.wait_for_completion, - max_wait_seconds=self.max_wait_seconds, - poll_interval=self.poll_interval, - auto_sync=self.auto_sync, - create_branch=self.create_branch, - ) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubRepoForker": - """ - Deserialize the component from a dictionary. - - :param data: Dictionary to deserialize from. - :returns: Deserialized component. - """ - init_params = data["init_parameters"] - deserialize_secrets_inplace(init_params, keys=["github_token"]) - return default_from_dict(cls, data) - - @component.output_types(repo=str, issue_branch=str) - def run(self, url: str) -> dict: - """ - Process a GitHub issue URL and create or sync a fork of the repository. - - :param url: GitHub issue URL - :return: Dictionary containing repository path in owner/repo format - """ - try: - # Extract repository information - owner, repo, issue_number = self._parse_github_url(url) - - # Check if fork already exists - user = self._get_authenticated_user() - existing_fork = self._get_existing_repository(repo) - - if existing_fork and self.auto_sync: - # If fork exists and auto_sync is enabled, sync with upstream - fork_path = f"{user}/{repo}" - logger.info("Fork already exists, syncing with upstream repository") - self._sync_fork(fork_path) - else: - # Create new fork - fork_path = self._create_fork(owner, repo) - - # Wait for fork completion if requested - if self.wait_for_completion: - import time - start_time = time.time() - - while time.time() - start_time < self.max_wait_seconds: - if self._check_fork_status(fork_path): - logger.info("Fork creation completed successfully") - break - logger.debug("Waiting for fork creation to complete...") - time.sleep(self.poll_interval) - else: - msg = f"Fork creation timed out after {self.max_wait_seconds} seconds" - if self.raise_on_failure: - raise TimeoutError(msg) - logger.warning(msg) - - # Create issue branch if enabled - issue_branch = None - if self.create_branch: - issue_branch = f"fix-{issue_number}" - logger.info(f"Creating branch for issue #{issue_number}") - self._create_issue_branch(fork_path, issue_number) - - return {"repo": fork_path, "issue_branch": issue_branch} - - except Exception as e: - if self.raise_on_failure: - raise - logger.warning("Error forking repository from {url}: {error}", url=url, error=str(e)) - return {"repo": "", "issue_branch": None} diff --git a/integrations/github-haystack/tests/__init__.py b/integrations/github-haystack/tests/__init__.py deleted file mode 100644 index d391382c6b..0000000000 --- a/integrations/github-haystack/tests/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2025-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 From af37ecd7afe7a6b729f9085946e3aea96b157da2 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 15 Apr 2025 08:41:53 +0200 Subject: [PATCH 04/51] renamed integration, added components dir --- README.md | 1 + .../{github_haystack => github}/LICENSE.txt | 0 .../{github_haystack => github}/README.md | 0 .../pyproject.toml | 22 +++++++++---------- .../components}/__about__.py | 0 .../components}/__init__.py | 0 .../components/connectors}/file_editor.py | 0 .../components/connectors}/issue_commenter.py | 0 .../components/connectors}/issue_viewer.py | 0 .../components/connectors}/pr_creator.py | 0 .../components/connectors}/repo_viewer.py | 0 .../connectors}/repository_forker.py | 0 .../components/prompts}/__init__.py | 0 .../components/prompts}/comment_tool.py | 0 .../components/prompts}/context.py | 0 .../components/prompts}/file_editor_tool.py | 0 .../components/prompts}/pr_system_prompt.py | 0 .../components/prompts}/repo_viewer_tool.py | 0 .../components/prompts}/system_prompt.py | 0 .../tests/__init__.py | 0 20 files changed, 12 insertions(+), 11 deletions(-) rename integrations/{github_haystack => github}/LICENSE.txt (100%) rename integrations/{github_haystack => github}/README.md (100%) rename integrations/{github_haystack => github}/pyproject.toml (68%) rename integrations/{github_haystack/src/github_haystack => github/src/haystack_integrations/components}/__about__.py (100%) rename integrations/{github_haystack/src/github_haystack => github/src/haystack_integrations/components}/__init__.py (100%) rename integrations/{github_haystack/src/github_haystack/github_components => github/src/haystack_integrations/components/connectors}/file_editor.py (100%) rename integrations/{github_haystack/src/github_haystack/github_components => github/src/haystack_integrations/components/connectors}/issue_commenter.py (100%) rename integrations/{github_haystack/src/github_haystack/github_components => github/src/haystack_integrations/components/connectors}/issue_viewer.py (100%) rename integrations/{github_haystack/src/github_haystack/github_components => github/src/haystack_integrations/components/connectors}/pr_creator.py (100%) rename integrations/{github_haystack/src/github_haystack/github_components => github/src/haystack_integrations/components/connectors}/repo_viewer.py (100%) rename integrations/{github_haystack/src/github_haystack/github_components => github/src/haystack_integrations/components/connectors}/repository_forker.py (100%) rename integrations/{github_haystack/src/github_haystack/agent_prompts => github/src/haystack_integrations/components/prompts}/__init__.py (100%) rename integrations/{github_haystack/src/github_haystack/agent_prompts => github/src/haystack_integrations/components/prompts}/comment_tool.py (100%) rename integrations/{github_haystack/src/github_haystack/agent_prompts => github/src/haystack_integrations/components/prompts}/context.py (100%) rename integrations/{github_haystack/src/github_haystack/agent_prompts => github/src/haystack_integrations/components/prompts}/file_editor_tool.py (100%) rename integrations/{github_haystack/src/github_haystack/agent_prompts => github/src/haystack_integrations/components/prompts}/pr_system_prompt.py (100%) rename integrations/{github_haystack/src/github_haystack/agent_prompts => github/src/haystack_integrations/components/prompts}/repo_viewer_tool.py (100%) rename integrations/{github_haystack/src/github_haystack/agent_prompts => github/src/haystack_integrations/components/prompts}/system_prompt.py (100%) rename integrations/{github_haystack => github}/tests/__init__.py (100%) diff --git a/README.md b/README.md index e930803a7c..4a774885c2 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta | [deepeval-haystack](integrations/deepeval/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/deepeval-haystack.svg)](https://pypi.org/project/deepeval-haystack) | [![Test / deepeval](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml) | | [elasticsearch-haystack](integrations/elasticsearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack) | [![Test / elasticsearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml) | | [fastembed-haystack](integrations/fastembed/) | Embedder, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/fastembed-haystack.svg)](https://pypi.org/project/fastembed-haystack/) | [![Test / fastembed](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml) | +| [github-haystack](integrations/github/) | Connector | [![PyPI - Version](https://img.shields.io/pypi/v/github-haystack.svg)](https://pypi.org/project/github-haystack) | [![Test / github](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/github.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/github.yml) | | [google-ai-haystack](integrations/google_ai/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/google-ai-haystack.svg)](https://pypi.org/project/google-ai-haystack) | [![Test / google-ai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml) | | [google-vertex-haystack](integrations/google_vertex/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack) | [![Test / google-vertex](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml) | | [instructor-embedders-haystack](integrations/instructor_embedders/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack) | [![Test / instructor-embedders](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml) | diff --git a/integrations/github_haystack/LICENSE.txt b/integrations/github/LICENSE.txt similarity index 100% rename from integrations/github_haystack/LICENSE.txt rename to integrations/github/LICENSE.txt diff --git a/integrations/github_haystack/README.md b/integrations/github/README.md similarity index 100% rename from integrations/github_haystack/README.md rename to integrations/github/README.md diff --git a/integrations/github_haystack/pyproject.toml b/integrations/github/pyproject.toml similarity index 68% rename from integrations/github_haystack/pyproject.toml rename to integrations/github/pyproject.toml index 539aa0c09a..d121dbad3f 100644 --- a/integrations/github_haystack/pyproject.toml +++ b/integrations/github/pyproject.toml @@ -7,7 +7,7 @@ name = "github-haystack" dynamic = ["version"] description = 'Haystack components for interacting with GitHub repositories' readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" license = "Apache-2.0" keywords = [] authors = [ @@ -16,42 +16,42 @@ authors = [ classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [] [project.urls] -Documentation = "https://github.com/deepset GmbH/github-haystack#readme" -Issues = "https://github.com/deepset GmbH/github-haystack/issues" -Source = "https://github.com/deepset GmbH/github-haystack" +Source = "https://github.com/deepset-ai/haystack-core-integrations/github" +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/github/README.md" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" [tool.hatch.version] -path = "src/github_haystack/__about__.py" +path = "src/github/__about__.py" [tool.hatch.envs.types] extra-dependencies = [ "mypy>=1.0.0", ] [tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive {args:src/github_haystack tests}" +check = "mypy --install-types --non-interactive {args:src/github tests}" [tool.coverage.run] -source_pkgs = ["github_haystack", "tests"] +source_pkgs = ["github", "tests"] branch = true parallel = true omit = [ - "src/github_haystack/__about__.py", + "src/github/__about__.py", ] [tool.coverage.paths] -github_haystack = ["src/github_haystack", "*/github-haystack/src/github_haystack"] -tests = ["tests", "*/github-haystack/tests"] +github = ["src/github", "*/github/src/github"] +tests = ["tests", "*/github/tests"] [tool.coverage.report] exclude_lines = [ diff --git a/integrations/github_haystack/src/github_haystack/__about__.py b/integrations/github/src/haystack_integrations/components/__about__.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/__about__.py rename to integrations/github/src/haystack_integrations/components/__about__.py diff --git a/integrations/github_haystack/src/github_haystack/__init__.py b/integrations/github/src/haystack_integrations/components/__init__.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/__init__.py rename to integrations/github/src/haystack_integrations/components/__init__.py diff --git a/integrations/github_haystack/src/github_haystack/github_components/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/github_components/file_editor.py rename to integrations/github/src/haystack_integrations/components/connectors/file_editor.py diff --git a/integrations/github_haystack/src/github_haystack/github_components/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/github_components/issue_commenter.py rename to integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py diff --git a/integrations/github_haystack/src/github_haystack/github_components/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/github_components/issue_viewer.py rename to integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py diff --git a/integrations/github_haystack/src/github_haystack/github_components/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/github_components/pr_creator.py rename to integrations/github/src/haystack_integrations/components/connectors/pr_creator.py diff --git a/integrations/github_haystack/src/github_haystack/github_components/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/github_components/repo_viewer.py rename to integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py diff --git a/integrations/github_haystack/src/github_haystack/github_components/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/github_components/repository_forker.py rename to integrations/github/src/haystack_integrations/components/connectors/repository_forker.py diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/__init__.py b/integrations/github/src/haystack_integrations/components/prompts/__init__.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/agent_prompts/__init__.py rename to integrations/github/src/haystack_integrations/components/prompts/__init__.py diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/comment_tool.py b/integrations/github/src/haystack_integrations/components/prompts/comment_tool.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/agent_prompts/comment_tool.py rename to integrations/github/src/haystack_integrations/components/prompts/comment_tool.py diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/context.py b/integrations/github/src/haystack_integrations/components/prompts/context.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/agent_prompts/context.py rename to integrations/github/src/haystack_integrations/components/prompts/context.py diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/file_editor_tool.py b/integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/agent_prompts/file_editor_tool.py rename to integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/pr_system_prompt.py b/integrations/github/src/haystack_integrations/components/prompts/pr_system_prompt.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/agent_prompts/pr_system_prompt.py rename to integrations/github/src/haystack_integrations/components/prompts/pr_system_prompt.py diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/agent_prompts/repo_viewer_tool.py rename to integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py diff --git a/integrations/github_haystack/src/github_haystack/agent_prompts/system_prompt.py b/integrations/github/src/haystack_integrations/components/prompts/system_prompt.py similarity index 100% rename from integrations/github_haystack/src/github_haystack/agent_prompts/system_prompt.py rename to integrations/github/src/haystack_integrations/components/prompts/system_prompt.py diff --git a/integrations/github_haystack/tests/__init__.py b/integrations/github/tests/__init__.py similarity index 100% rename from integrations/github_haystack/tests/__init__.py rename to integrations/github/tests/__init__.py From ac4bf31946fb58fa2b6326d28315042780355ed6 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 09:20:10 +0200 Subject: [PATCH 05/51] add tests, pydoc, update pyproject.toml --- integrations/github/pydoc/config.yml | 34 +++ integrations/github/pyproject.toml | 158 ++++++++-- .../components/__about__.py | 4 - .../components/__init__.py | 3 - integrations/github/tests/test_file_editor.py | 272 ++++++++++++++++++ .../github/tests/test_issue_commenter.py | 126 ++++++++ .../github/tests/test_issue_viewer.py | 155 ++++++++++ integrations/github/tests/test_pr_creator.py | 115 ++++++++ integrations/github/tests/test_repo_viewer.py | 187 ++++++++++++ .../github/tests/test_repository_forker.py | 197 +++++++++++++ 10 files changed, 1221 insertions(+), 30 deletions(-) create mode 100644 integrations/github/pydoc/config.yml delete mode 100644 integrations/github/src/haystack_integrations/components/__about__.py delete mode 100644 integrations/github/src/haystack_integrations/components/__init__.py create mode 100644 integrations/github/tests/test_file_editor.py create mode 100644 integrations/github/tests/test_issue_commenter.py create mode 100644 integrations/github/tests/test_issue_viewer.py create mode 100644 integrations/github/tests/test_pr_creator.py create mode 100644 integrations/github/tests/test_repo_viewer.py create mode 100644 integrations/github/tests/test_repository_forker.py diff --git a/integrations/github/pydoc/config.yml b/integrations/github/pydoc/config.yml new file mode 100644 index 0000000000..83694982ea --- /dev/null +++ b/integrations/github/pydoc/config.yml @@ -0,0 +1,34 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.connectors.file_editor", + "haystack_integrations.components.connectors.issue_commenter", + "haystack_integrations.components.connectors.issue_viewer", + "haystack_integrations.components.connectors.pr_creator", + "haystack_integrations.components.connectors.repo_viewer", + "haystack_integrations.components.connectors.repository_forker", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmeIntegrationRenderer + excerpt: GitHub integration for Haystack + category_slug: integrations-api + title: GitHub + slug: integrations-github + order: 100 + markdown: + descriptive_class_title: false + classdef_code_block: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_github.md \ No newline at end of file diff --git a/integrations/github/pyproject.toml b/integrations/github/pyproject.toml index d121dbad3f..6a29ffa2ab 100644 --- a/integrations/github/pyproject.toml +++ b/integrations/github/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["hatchling"] +requires = ["hatchling", "hatch-vcs"] build-backend = "hatchling.build" [project] @@ -10,10 +10,9 @@ readme = "README.md" requires-python = ">=3.9" license = "Apache-2.0" keywords = [] -authors = [ - { name = "deepset GmbH", email = "info@deepset.ai" }, -] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] classifiers = [ + "License :: OSI Approved :: Apache Software License", "Development Status :: 4 - Beta", "Programming Language :: Python", "Programming Language :: Python :: 3.9", @@ -24,38 +23,151 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = [] +dependencies = ["haystack-ai"] [project.urls] Source = "https://github.com/deepset-ai/haystack-core-integrations/github" Documentation = "https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/github/README.md" Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + [tool.hatch.version] -path = "src/github/__about__.py" +source = "vcs" +tag-pattern = 'integrations\/github-v(?P.*)' + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/github-v[0-9]*"' -[tool.hatch.envs.types] -extra-dependencies = [ - "mypy>=1.0.0", +[tool.hatch.envs.default] +installer = "uv" +dependencies = [ + "coverage[toml]>=6.5", + "pytest", + "pytest-rerunfailures", + "haystack-pydoc-tools", + "pytz", + ] + +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +test-cov-retry = "test-cov --reruns 3 --reruns-delay 30 -x" +cov-report = ["- coverage combine", "coverage report"] +cov = ["test-cov", "cov-report"] +cov-retry = ["test-cov-retry", "cov-report"] +docs = ["pydoc-markdown pydoc/config.yml"] + +[tool.hatch.envs.lint] +installer = "uv" +detached = true +dependencies = ["pip", "black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"] +[tool.hatch.envs.lint.scripts] +typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" +style = [ + "ruff check {args:}", + "black --check --diff {args:.}", +] +fmt = ["black {args:.}", "ruff check --fix {args:}", "style"] +all = ["style", "typing"] + +[tool.black] +target-version = ["py38"] +line-length = 120 +skip-string-normalization = true + +[tool.ruff] +target-version = "py38" +line-length = 120 + +[tool.ruff.lint] +select = [ + "A", + "ARG", + "B", + "C", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", + # Misc + "B008", + "S101", ] -[tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive {args:src/github tests}" +unfixable = [ + # Don't touch unused imports + "F401", +] + +[tool.ruff.lint.isort] +known-first-party = ["haystack_integrations"] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] [tool.coverage.run] -source_pkgs = ["github", "tests"] +source = ["haystack_integrations"] branch = true -parallel = true -omit = [ - "src/github/__about__.py", -] +parallel = false -[tool.coverage.paths] -github = ["src/github", "*/github/src/github"] -tests = ["tests", "*/github/tests"] [tool.coverage.report] -exclude_lines = [ - "no cov", - "if __name__ == .__main__.:", - "if TYPE_CHECKING:", +omit = ["*/tests/*", "*/__init__.py"] +show_missing = true +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] + + +[[tool.mypy.overrides]] +module = [ + "github.*", + "haystack.*", + "haystack_integrations.*", + "openai.*", + "pytest.*", + "numpy.*", +] +ignore_missing_imports = true + +[tool.pytest.ini_options] +addopts = "--strict-markers" +markers = [ + "integration: integration tests", + "unit: unit tests", ] +log_cli = true diff --git a/integrations/github/src/haystack_integrations/components/__about__.py b/integrations/github/src/haystack_integrations/components/__about__.py deleted file mode 100644 index 348b27fe81..0000000000 --- a/integrations/github/src/haystack_integrations/components/__about__.py +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-FileCopyrightText: 2025-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -__version__ = "0.0.1" diff --git a/integrations/github/src/haystack_integrations/components/__init__.py b/integrations/github/src/haystack_integrations/components/__init__.py deleted file mode 100644 index d391382c6b..0000000000 --- a/integrations/github/src/haystack_integrations/components/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2025-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py new file mode 100644 index 0000000000..b6e4aa7d6a --- /dev/null +++ b/integrations/github/tests/test_file_editor.py @@ -0,0 +1,272 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.file_editor import GithubFileEditor, Command + + +class TestGithubFileEditor: + def test_init_default(self): + editor = GithubFileEditor() + assert editor.github_token is None + assert editor.default_repo is None + assert editor.default_branch == "main" + assert editor.raise_on_failure is True + + def test_init_with_parameters(self): + token = Secret.from_token("test_token") + editor = GithubFileEditor( + github_token=token, + repo="owner/repo", + branch="feature", + raise_on_failure=False + ) + assert editor.github_token == token + assert editor.default_repo == "owner/repo" + assert editor.default_branch == "feature" + assert editor.raise_on_failure is False + + # Test with invalid token type + with pytest.raises(TypeError): + GithubFileEditor(github_token="not_a_secret") + + + def test_to_dict(self): + token = Secret.from_token("test_token") + editor = GithubFileEditor( + github_token=token, + repo="owner/repo", + branch="feature", + raise_on_failure=False + ) + + result = editor.to_dict() + + assert result["github_token"]["type"] == "haystack.utils.Secret" + assert result["repo"] == "owner/repo" + assert result["branch"] == "feature" + assert result["raise_on_failure"] is False + + + def test_from_dict(self): + data = { + "github_token": { + "type": "haystack.utils.Secret", + "token": "test_token" + }, + "repo": "owner/repo", + "branch": "feature", + "raise_on_failure": False + } + + editor = GithubFileEditor.from_dict(data) + + assert isinstance(editor.github_token, Secret) + assert editor.github_token.resolve_value() == "test_token" + assert editor.default_repo == "owner/repo" + assert editor.default_branch == "feature" + assert editor.raise_on_failure is False + + + @patch("requests.get") + @patch("requests.put") + def test_run_edit(self, mock_put, mock_get): + # Mock the file content response + mock_get.return_value.json.return_value = { + "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" + "sha": "abc123" + } + mock_get.return_value.raise_for_status.return_value = None + + # Mock the update response + mock_put.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + editor = GithubFileEditor(github_token=token) + + result = editor.run( + command=Command.EDIT, + payload={ + "path": "test.txt", + "original": "Hello", + "replacement": "Hi", + "message": "Update greeting" + }, + repo="owner/repo", + branch="main" + ) + + assert result["result"] == "Edit successful" + + # Verify the API calls + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/test.txt", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, + params={"ref": "main"} + ) + + mock_put.assert_called_once() + put_call = mock_put.call_args + assert put_call[0][0] == "https://api.github.com/repos/owner/repo/contents/test.txt" + assert put_call[1]["json"]["message"] == "Update greeting" + assert put_call[1]["json"]["sha"] == "abc123" + assert put_call[1]["json"]["branch"] == "main" + + + @patch("requests.get") + @patch("requests.patch") + def test_run_undo(self, mock_patch, mock_get): + # Mock the user check response + mock_get.return_value.json.return_value = { + "login": "testuser" + } + mock_get.return_value.raise_for_status.return_value = None + + # Mock the commits response + mock_get.side_effect = [ + type("Response", (), {"json": lambda: [{"author": {"login": "testuser"}}], "raise_for_status": lambda: None}), + type("Response", (), {"json": lambda: [{"sha": "abc123"}, {"sha": "def456"}], "raise_for_status": lambda: None}) + ] + + # Mock the update response + mock_patch.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + editor = GithubFileEditor(github_token=token) + + result = editor.run( + command=Command.UNDO, + payload={"message": "Undo last change"}, + repo="owner/repo", + branch="main" + ) + + assert result["result"] == "Successfully undid last change" + + # Verify the API calls + assert mock_get.call_count == 3 + mock_patch.assert_called_once_with( + "https://api.github.com/repos/owner/repo/git/refs/heads/main", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, + json={"sha": "def456", "force": True} + ) + + + @patch("requests.put") + def test_run_create(self, mock_put): + # Mock the create response + mock_put.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + editor = GithubFileEditor(github_token=token) + + result = editor.run( + command=Command.CREATE, + payload={ + "path": "new.txt", + "content": "New file content", + "message": "Create new file" + }, + repo="owner/repo", + branch="main" + ) + + assert result["result"] == "File created successfully" + + # Verify the API call + mock_put.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/new.txt", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, + json={ + "message": "Create new file", + "content": "TmV3IGZpbGUgY29udGVudA==", # Base64 encoded "New file content" + "branch": "main" + } + ) + + + @patch("requests.get") + @patch("requests.delete") + def test_run_delete(self, mock_delete, mock_get): + # Mock the file content response + mock_get.return_value.json.return_value = { + "sha": "abc123" + } + mock_get.return_value.raise_for_status.return_value = None + + # Mock the delete response + mock_delete.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + editor = GithubFileEditor(github_token=token) + + result = editor.run( + command=Command.DELETE, + payload={ + "path": "test.txt", + "message": "Delete file" + }, + repo="owner/repo", + branch="main" + ) + + assert result["result"] == "File deleted successfully" + + # Verify the API calls + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/test.txt", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, + params={"ref": "main"} + ) + + mock_delete.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/test.txt", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, + json={ + "message": "Delete file", + "sha": "abc123", + "branch": "main" + } + ) + + + @patch("requests.get") + def test_run_error_handling(self, mock_get): + # Mock an error response + mock_get.side_effect = Exception("API Error") + + token = Secret.from_token("test_token") + editor = GithubFileEditor(github_token=token, raise_on_failure=False) + + result = editor.run( + command=Command.EDIT, + payload={ + "path": "test.txt", + "original": "Hello", + "replacement": "Hi", + "message": "Update greeting" + }, + repo="owner/repo", + branch="main" + ) + + assert "Error: API Error" in result["result"] + + # Test with raise_on_failure=True + editor = GithubFileEditor(github_token=token, raise_on_failure=True) + with pytest.raises(Exception): + editor.run( + command=Command.EDIT, + payload={ + "path": "test.txt", + "original": "Hello", + "replacement": "Hi", + "message": "Update greeting" + }, + repo="owner/repo", + branch="main" + ) diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py new file mode 100644 index 0000000000..0def098eea --- /dev/null +++ b/integrations/github/tests/test_issue_commenter.py @@ -0,0 +1,126 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.issue_commenter import GithubIssueCommenter + +class TestGithubIssueCommenter: + def test_init_default(self): + commenter = GithubIssueCommenter() + assert commenter.github_token is not None + assert commenter.raise_on_failure is True + assert commenter.retry_attempts == 2 + + def test_init_with_parameters(self): + token = Secret.from_token("test_token") + commenter = GithubIssueCommenter( + github_token=token, + raise_on_failure=False, + retry_attempts=3 + ) + assert commenter.github_token == token + assert commenter.raise_on_failure is False + assert commenter.retry_attempts == 3 + + + def test_to_dict(self): + token = Secret.from_token("test_token") + commenter = GithubIssueCommenter( + github_token=token, + raise_on_failure=False, + retry_attempts=3 + ) + + result = commenter.to_dict() + + assert result["type"] == "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter" + assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" + assert result["init_parameters"]["raise_on_failure"] is False + assert result["init_parameters"]["retry_attempts"] == 3 + + + def test_from_dict(self): + data = { + "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", + "init_parameters": { + "github_token": { + "type": "haystack.utils.Secret", + "token": "test_token" + }, + "raise_on_failure": False, + "retry_attempts": 3 + } + } + + commenter = GithubIssueCommenter.from_dict(data) + + assert isinstance(commenter.github_token, Secret) + assert commenter.github_token.resolve_value() == "test_token" + assert commenter.raise_on_failure is False + assert commenter.retry_attempts == 3 + + + @patch("requests.post") + def test_run(self, mock_post): + """Test the run method.""" + # Mock the successful response + mock_post.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + commenter = GithubIssueCommenter(github_token=token) + + result = commenter.run( + url="https://github.com/owner/repo/issues/123", + comment="Test comment" + ) + + assert result["success"] is True + + # Verify the API call + mock_post.assert_called_once_with( + "https://api.github.com/repos/owner/repo/issues/123/comments", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubIssueCommenter", "Authorization": "Bearer test_token"}, + json={"body": "Test comment"} + ) + + + @patch("requests.post") + def test_run_error_handling(self, mock_post): + # Mock an error response + mock_post.side_effect = Exception("API Error") + + token = Secret.from_token("test_token") + commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False) + + result = commenter.run( + url="https://github.com/owner/repo/issues/123", + comment="Test comment" + ) + + assert result["success"] is False + + # Test with raise_on_failure=True + commenter = GithubIssueCommenter(github_token=token, raise_on_failure=True) + with pytest.raises(Exception): + commenter.run( + url="https://github.com/owner/repo/issues/123", + comment="Test comment" + ) + + + def test_parse_github_url(self): + token = Secret.from_token("test_token") + commenter = GithubIssueCommenter(github_token=token) + + owner, repo, issue_number = commenter._parse_github_url("https://github.com/owner/repo/issues/123") + assert owner == "owner" + assert repo == "repo" + assert issue_number == 123 + + # Test with invalid URL + with pytest.raises(ValueError): + commenter._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py new file mode 100644 index 0000000000..9933352cc2 --- /dev/null +++ b/integrations/github/tests/test_issue_viewer.py @@ -0,0 +1,155 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +from haystack import Document +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.issue_viewer import GithubIssueViewer + +class TestGithubIssueViewer: + def test_init_default(self): + viewer = GithubIssueViewer() + assert viewer.github_token is None + assert viewer.raise_on_failure is True + assert viewer.retry_attempts == 2 + + def test_init_with_parameters(self): + token = Secret.from_token("test_token") + viewer = GithubIssueViewer( + github_token=token, + raise_on_failure=False, + retry_attempts=3 + ) + assert viewer.github_token == token + assert viewer.raise_on_failure is False + assert viewer.retry_attempts == 3 + + + def test_to_dict(self): + token = Secret.from_token("test_token") + viewer = GithubIssueViewer( + github_token=token, + raise_on_failure=False, + retry_attempts=3 + ) + + result = viewer.to_dict() + + assert result["type"] == "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer" + assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" + assert result["init_parameters"]["raise_on_failure"] is False + assert result["init_parameters"]["retry_attempts"] == 3 + + + def test_from_dict(): + data = { + "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", + "init_parameters": { + "github_token": { + "type": "haystack.utils.Secret", + "token": "test_token" + }, + "raise_on_failure": False, + "retry_attempts": 3 + } + } + + viewer = GithubIssueViewer.from_dict(data) + + assert isinstance(viewer.github_token, Secret) + assert viewer.github_token.resolve_value() == "test_token" + assert viewer.raise_on_failure is False + assert viewer.retry_attempts == 3 + + + @patch("requests.get") + def test_run(mock_get): + """Test the run method.""" + # Mock the issue response + mock_get.return_value.json.return_value = { + "body": "Issue body", + "title": "Issue title", + "number": 123, + "state": "open", + "created_at": "2023-01-01T00:00:00Z", + "updated_at": "2023-01-02T00:00:00Z", + "user": {"login": "test_user"}, + "html_url": "https://github.com/owner/repo/issues/123", + "comments": 2, + "comments_url": "https://api.github.com/repos/owner/repo/issues/123/comments" + } + mock_get.return_value.raise_for_status.return_value = None + + # Mock the comments response + mock_get.side_effect = [ + mock_get.return_value, # First call for issue + type('Response', (), { + 'json': lambda: [ + { + "body": "Comment 1", + "created_at": "2023-01-01T01:00:00Z", + "updated_at": "2023-01-01T01:00:00Z", + "user": {"login": "commenter1"}, + "html_url": "https://github.com/owner/repo/issues/123#issuecomment-1" + }, + { + "body": "Comment 2", + "created_at": "2023-01-01T02:00:00Z", + "updated_at": "2023-01-01T02:00:00Z", + "user": {"login": "commenter2"}, + "html_url": "https://github.com/owner/repo/issues/123#issuecomment-2" + } + ], + 'raise_for_status': lambda: None + }) + ] + + token = Secret.from_token("test_token") + viewer = GithubIssueViewer(github_token=token) + + result = viewer.run(url="https://github.com/owner/repo/issues/123") + + assert len(result["documents"]) == 3 # 1 issue + 2 comments + assert result["documents"][0].meta["type"] == "issue" + assert result["documents"][1].meta["type"] == "comment" + assert result["documents"][2].meta["type"] == "comment" + + # Verify the API calls + assert mock_get.call_count == 2 + + + @patch("requests.get") + def test_run_error_handling(mock_get): + # Mock an error response + mock_get.side_effect = Exception("API Error") + + token = Secret.from_token("test_token") + viewer = GithubIssueViewer(github_token=token, raise_on_failure=False) + + result = viewer.run(url="https://github.com/owner/repo/issues/123") + + assert len(result["documents"]) == 1 + assert result["documents"][0].meta["type"] == "error" + assert result["documents"][0].meta["error"] is True + + # Test with raise_on_failure=True + viewer = GithubIssueViewer(github_token=token, raise_on_failure=True) + with pytest.raises(Exception): + viewer.run(url="https://github.com/owner/repo/issues/123") + + + def test_parse_github_url(self): + token = Secret.from_token("test_token") + viewer = GithubIssueViewer(github_token=token) + + owner, repo, issue_number = viewer._parse_github_url("https://github.com/owner/repo/issues/123") + assert owner == "owner" + assert repo == "repo" + assert issue_number == 123 + + # Test with invalid URL + with pytest.raises(ValueError): + viewer._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py new file mode 100644 index 0000000000..fee24056ce --- /dev/null +++ b/integrations/github/tests/test_pr_creator.py @@ -0,0 +1,115 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import Mock, patch + +import pytest +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.pr_creator import GithubPRCreator + + +class TestGithubPRCreator: + def test_init_default(self): + pr_creator = GithubPRCreator() + assert pr_creator.github_token is not None + assert pr_creator.raise_on_failure is True + + def test_init_with_parameters(self): + token = Secret.from_token("test_token") + pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + assert pr_creator.github_token == token + assert pr_creator.raise_on_failure is False + + # Test with invalid token type + with pytest.raises(TypeError): + GithubPRCreator(github_token="not_a_secret") + + + def test_to_dict(self): + token = Secret.from_token("test_token") + pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + + result = pr_creator.to_dict() + + assert result["type"] == "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator" + assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" + assert result["init_parameters"]["raise_on_failure"] is False + + + def test_from_dict(self): + data = { + "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", + "init_parameters": { + "github_token": { + "type": "haystack.utils.Secret", + "token": "test_token" + }, + "raise_on_failure": False + } + } + + pr_creator = GithubPRCreator.from_dict(data) + + assert isinstance(pr_creator.github_token, Secret) + assert pr_creator.github_token.resolve_value() == "test_token" + assert pr_creator.raise_on_failure is False + + + @patch("requests.get") + @patch("requests.post") + def test_run(self, mock_post, mock_get): + # Mock the authenticated user response + mock_get.return_value.json.return_value = {"login": "test_user"} + mock_get.return_value.raise_for_status.return_value = None + + # Mock the PR creation response + mock_post.return_value.json.return_value = {"number": 123} + mock_post.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + pr_creator = GithubPRCreator(github_token=token) + + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", + body="Test body", + draft=False + ) + + assert result["result"] == "Pull request #123 created successfully and linked to issue #456" + + # Verify the API calls + mock_get.assert_called_once() + mock_post.assert_called_once() + + + @patch("requests.get") + @patch("requests.post") + def test_run_error_handling(self, mock_post, mock_get): + # Mock an error response + mock_get.side_effect = Exception("API Error") + + token = Secret.from_token("test_token") + pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main" + ) + + assert "Error" in result["result"] + + # Test with raise_on_failure=True + pr_creator = GithubPRCreator(github_token=token, raise_on_failure=True) + with pytest.raises(Exception): + pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main" + ) diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py new file mode 100644 index 0000000000..db3fe57439 --- /dev/null +++ b/integrations/github/tests/test_repo_viewer.py @@ -0,0 +1,187 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +from haystack import Document +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.repo_viewer import GithubRepositoryViewer + +class TestGithubRepositoryViewer: + def test_init_default(self): + viewer = GithubRepositoryViewer() + assert viewer.github_token is None + assert viewer.raise_on_failure is True + assert viewer.max_file_size == 1_000_000 + assert viewer.repo is None + assert viewer.branch is None + + def test_init_with_parameters(self): + token = Secret.from_token("test_token") + viewer = GithubRepositoryViewer( + github_token=token, + raise_on_failure=False, + max_file_size=500_000, + repo="owner/repo", + branch="main" + ) + assert viewer.github_token == token + assert viewer.raise_on_failure is False + assert viewer.max_file_size == 500_000 + assert viewer.repo == "owner/repo" + assert viewer.branch == "main" + + # Test with invalid token type + with pytest.raises(TypeError): + GithubRepositoryViewer(github_token="not_a_secret") + + + def test_to_dict(self): + token = Secret.from_token("test_token") + viewer = GithubRepositoryViewer( + github_token=token, + raise_on_failure=False, + max_file_size=500_000 + ) + + result = viewer.to_dict() + + assert result["github_token"]["type"] == "haystack.utils.Secret" + assert result["raise_on_failure"] is False + assert result["max_file_size"] == 500_000 + + + def test_from_dict(self): + data = { + "github_token": { + "type": "haystack.utils.Secret", + "token": "test_token" + }, + "raise_on_failure": False, + "max_file_size": 500_000 + } + + viewer = GithubRepositoryViewer.from_dict(data) + + assert isinstance(viewer.github_token, Secret) + assert viewer.github_token.resolve_value() == "test_token" + assert viewer.raise_on_failure is False + assert viewer.max_file_size == 500_000 + + + @patch("requests.get") + def test_run_file(self, mock_get): + # Mock the file response + mock_get.return_value.json.return_value = { + "name": "README.md", + "path": "README.md", + "size": 100, + "html_url": "https://github.com/owner/repo/blob/main/README.md", + "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" + "encoding": "base64" + } + mock_get.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + viewer = GithubRepositoryViewer(github_token=token) + + result = viewer.run( + repo="owner/repo", + path="README.md", + branch="main" + ) + + assert len(result["documents"]) == 1 + assert result["documents"][0].content == "Hello World" + assert result["documents"][0].meta["type"] == "file_content" + assert result["documents"][0].meta["path"] == "README.md" + + # Verify the API call + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/README.md?ref=main", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepositoryViewer", "Authorization": "Bearer test_token"} + ) + + + @patch("requests.get") + def test_run_directory(self, mock_get): + # Mock the directory response + mock_get.return_value.json.return_value = [ + { + "name": "docs", + "path": "docs", + "type": "dir", + "html_url": "https://github.com/owner/repo/tree/main/docs" + }, + { + "name": "README.md", + "path": "README.md", + "type": "file", + "size": 100, + "html_url": "https://github.com/owner/repo/blob/main/README.md" + } + ] + mock_get.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + viewer = GithubRepositoryViewer(github_token=token) + + result = viewer.run( + repo="owner/repo", + path="", + branch="main" + ) + + assert len(result["documents"]) == 2 + assert result["documents"][0].content == "docs" + assert result["documents"][0].meta["type"] == "dir" + assert result["documents"][1].content == "README.md" + assert result["documents"][1].meta["type"] == "file" + + # Verify the API call + mock_get.assert_called_once_with( + "https://api.github.com/repos/owner/repo/contents/?ref=main", + headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepositoryViewer", "Authorization": "Bearer test_token"} + ) + + + @patch("requests.get") + def test_run_error_handling(self, mock_get): + # Mock an error response + mock_get.side_effect = Exception("API Error") + + token = Secret.from_token("test_token") + viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=False) + + result = viewer.run( + repo="owner/repo", + path="README.md", + branch="main" + ) + + assert len(result["documents"]) == 1 + assert result["documents"][0].meta["type"] == "error" + + # Test with raise_on_failure=True + viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=True) + with pytest.raises(Exception): + viewer.run( + repo="owner/repo", + path="README.md", + branch="main" + ) + + + def test_parse_repo(self): + token = Secret.from_token("test_token") + viewer = GithubRepositoryViewer(github_token=token) + + owner, repo = viewer._parse_repo("owner/repo") + assert owner == "owner" + assert repo == "repo" + + # Test with invalid format + with pytest.raises(ValueError): + viewer._parse_repo("invalid_format") diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py new file mode 100644 index 0000000000..62c3772e36 --- /dev/null +++ b/integrations/github/tests/test_repository_forker.py @@ -0,0 +1,197 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.repository_forker import GithubRepoForker + +class TestGithubRepoForker: + def test_init_default(self): + forker = GithubRepoForker() + assert forker.github_token is not None + assert forker.raise_on_failure is True + assert forker.wait_for_completion is False + assert forker.max_wait_seconds == 300 + assert forker.poll_interval == 2 + assert forker.auto_sync is True + assert forker.create_branch is True + + def test_init_with_parameters(self): + token = Secret.from_token("test_token") + forker = GithubRepoForker( + github_token=token, + raise_on_failure=False, + wait_for_completion=True, + max_wait_seconds=60, + poll_interval=1, + auto_sync=False, + create_branch=False + ) + assert forker.github_token == token + assert forker.raise_on_failure is False + assert forker.wait_for_completion is True + assert forker.max_wait_seconds == 60 + assert forker.poll_interval == 1 + assert forker.auto_sync is False + assert forker.create_branch is False + + # Test with invalid token type + with pytest.raises(TypeError): + GithubRepoForker(github_token="not_a_secret") + + + def test_to_dict(self): + token = Secret.from_token("test_token") + forker = GithubRepoForker( + github_token=token, + raise_on_failure=False, + wait_for_completion=True, + max_wait_seconds=60, + poll_interval=1, + auto_sync=False, + create_branch=False + ) + + result = forker.to_dict() + + assert result["type"] == "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker" + assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" + assert result["init_parameters"]["raise_on_failure"] is False + assert result["init_parameters"]["wait_for_completion"] is True + assert result["init_parameters"]["max_wait_seconds"] == 60 + assert result["init_parameters"]["poll_interval"] == 1 + assert result["init_parameters"]["auto_sync"] is False + assert result["init_parameters"]["create_branch"] is False + + + def test_from_dict(self): + data = { + "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", + "init_parameters": { + "github_token": { + "type": "haystack.utils.Secret", + "token": "test_token" + }, + "raise_on_failure": False, + "wait_for_completion": True, + "max_wait_seconds": 60, + "poll_interval": 1, + "auto_sync": False, + "create_branch": False + } + } + + forker = GithubRepoForker.from_dict(data) + + assert isinstance(forker.github_token, Secret) + assert forker.github_token.resolve_value() == "test_token" + assert forker.raise_on_failure is False + assert forker.wait_for_completion is True + assert forker.max_wait_seconds == 60 + assert forker.poll_interval == 1 + assert forker.auto_sync is False + assert forker.create_branch is False + + + @patch("requests.get") + @patch("requests.post") + def test_run_create_fork(self, mock_post, mock_get): + # Mock the authenticated user response + mock_get.return_value.json.return_value = {"login": "test_user"} + mock_get.return_value.raise_for_status.return_value = None + + # Mock the fork creation response + mock_post.return_value.json.return_value = { + "owner": {"login": "test_user"}, + "name": "repo" + } + mock_post.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + forker = GithubRepoForker( + github_token=token, + create_branch=True, + auto_sync=False + ) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "test_user/repo" + assert result["issue_branch"] == "fix-123" + + # Verify the API calls + mock_get.assert_called_once() + assert mock_post.call_count == 2 # One for fork creation, one for branch creation + + + @patch("requests.get") + @patch("requests.post") + def test_run_sync_existing_fork(self, mock_post, mock_get): + # Mock the authenticated user response + mock_get.side_effect = [ + type('Response', (), { + 'json': lambda: {"login": "test_user"}, + 'raise_for_status': lambda: None + }), + type('Response', (), { + 'status_code': 200, + 'json': lambda: {"name": "repo"}, + 'raise_for_status': lambda: None + }) + ] + + # Mock the sync response + mock_post.return_value.raise_for_status.return_value = None + + token = Secret.from_token("test_token") + forker = GithubRepoForker( + github_token=token, + create_branch=True, + auto_sync=True + ) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "test_user/repo" + assert result["issue_branch"] == "fix-123" + + # Verify the API calls + assert mock_get.call_count == 2 + assert mock_post.call_count == 2 # One for sync, one for branch creation + + + @patch("requests.get") + @patch("requests.post") + def test_run_error_handling(self, mock_post, mock_get): + # Mock an error response + mock_get.side_effect = Exception("API Error") + + token = Secret.from_token("test_token") + forker = GithubRepoForker(github_token=token, raise_on_failure=False) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "" + assert result["issue_branch"] is None + + # Test with raise_on_failure=True + forker = GithubRepoForker(github_token=token, raise_on_failure=True) + with pytest.raises(Exception): + forker.run(url="https://github.com/owner/repo/issues/123") + + + def test_parse_github_url(self): + token = Secret.from_token("test_token") + forker = GithubRepoForker(github_token=token) + + owner, repo, issue_number = forker._parse_github_url("https://github.com/owner/repo/issues/123") + assert owner == "owner" + assert repo == "repo" + assert issue_number == "123" + + # Test with invalid URL + with pytest.raises(ValueError): + forker._parse_github_url("https://github.com/invalid/url") From 2e2202a3a9cc34491a4d73605a0b72005df0e22f Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 09:24:42 +0200 Subject: [PATCH 06/51] add workflow --- .github/workflows/github.yml | 75 ++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 .github/workflows/github.yml diff --git a/.github/workflows/github.yml b/.github/workflows/github.yml new file mode 100644 index 0000000000..192857e978 --- /dev/null +++ b/.github/workflows/github.yml @@ -0,0 +1,75 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / github + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/github/**" + - "!integrations/github/*.md" + - ".github/workflows/github.yml" + +defaults: + run: + working-directory: integrations/github + +concurrency: + group: github-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + +jobs: + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ["3.9", "3.13"] + + steps: + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install --upgrade hatch + + - name: Lint + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run lint:all + + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + + - name: Run tests + run: hatch run cov-retry + + - name: Nightly - run unit tests with Haystack main branch + if: github.event_name == 'schedule' + run: | + hatch run uv pip install git+https://github.com/deepset-ai/haystack.git@main + hatch run cov-retry -m "not integration" + + - name: Send event to Datadog for nightly failures + if: failure() && github.event_name == 'schedule' + uses: ./.github/actions/send_failure + with: + title: | + Core integrations nightly tests failure: ${{ github.workflow }} + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} From fcfec47b2ab1faa5e6fa3440df8d3c54bc4b7ce1 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 10:00:35 +0200 Subject: [PATCH 07/51] fmt --- .../components/connectors/file_editor.py | 57 ++--- .../components/connectors/issue_commenter.py | 12 +- .../components/connectors/issue_viewer.py | 14 +- .../components/connectors/pr_creator.py | 25 +-- .../components/connectors/repo_viewer.py | 21 +- .../connectors/repository_forker.py | 49 ++--- .../components/prompts/comment_tool.py | 7 +- .../components/prompts/file_editor_tool.py | 46 ++-- .../components/prompts/repo_viewer_tool.py | 9 +- integrations/github/tests/test_file_editor.py | 203 ++++++++---------- .../github/tests/test_issue_commenter.py | 82 +++---- .../github/tests/test_issue_viewer.py | 101 ++++----- integrations/github/tests/test_pr_creator.py | 48 ++--- integrations/github/tests/test_repo_viewer.py | 111 ++++------ .../github/tests/test_repository_forker.py | 90 +++----- 15 files changed, 340 insertions(+), 535 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py index cfacfa213e..12cd41c579 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py @@ -8,6 +8,7 @@ logger = logging.getLogger(__name__) + class Command(StrEnum): """ Available commands for file operations in GitHub. @@ -18,11 +19,13 @@ class Command(StrEnum): CREATE: Create a new file DELETE: Delete an existing file """ + EDIT = "edit" UNDO = "undo" CREATE = "create" DELETE = "delete" + @component class GithubFileEditor: """ @@ -74,7 +77,7 @@ def __init__( github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), repo: Optional[str] = None, branch: str = "main", - raise_on_failure: bool = True + raise_on_failure: bool = True, ): """ Initialize the component. @@ -95,7 +98,7 @@ def __init__( self.headers = { "Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {self.github_token.resolve_value()}", - "User-Agent": "Haystack/GithubFileEditor" + "User-Agent": "Haystack/GithubFileEditor", } def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: @@ -107,23 +110,14 @@ def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tu content = b64decode(data["content"]).decode("utf-8") return content, data["sha"] - def _update_file( - self, - owner: str, - repo: str, - path: str, - content: str, - message: str, - sha: str, - branch: str - ) -> bool: + def _update_file(self, owner: str, repo: str, path: str, content: str, message: str, sha: str, branch: str) -> bool: """Update file content on GitHub.""" url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" payload = { "message": message, "content": b64encode(content.encode("utf-8")).decode("utf-8"), "sha": sha, - "branch": branch + "branch": branch, } response = requests.put(url, headers=self.headers, json=payload) response.raise_for_status() @@ -158,15 +152,13 @@ def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str # Perform the replacement new_content = content.replace(payload["original"], payload["replacement"]) - success = self._update_file( - owner, repo, payload["path"], new_content, payload["message"], sha, branch - ) + success = self._update_file(owner, repo, payload["path"], new_content, payload["message"], sha, branch) return "Edit successful" if success else "Edit failed" except requests.RequestException as e: if self.raise_on_failure: raise - return f"Error: {str(e)}" + return f"Error: {e!s}" def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: """Handle undoing changes.""" @@ -179,11 +171,7 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" # Get the previous commit SHA - commits = requests.get( - commits_url, - headers=self.headers, - params={"per_page": 2, "sha": branch} - ).json() + commits = requests.get(commits_url, headers=self.headers, params={"per_page": 2, "sha": branch}).json() previous_sha = commits[1]["sha"] # Update branch reference to previous commit @@ -196,7 +184,7 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: except requests.RequestException as e: if self.raise_on_failure: raise - return f"Error: {str(e)}" + return f"Error: {e!s}" def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: """Handle file creation.""" @@ -204,11 +192,7 @@ def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" content = b64encode(payload["content"].encode("utf-8")).decode("utf-8") - data = { - "message": payload["message"], - "content": content, - "branch": branch - } + data = {"message": payload["message"], "content": content, "branch": branch} response = requests.put(url, headers=self.headers, json=data) response.raise_for_status() @@ -217,7 +201,7 @@ def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s except requests.RequestException as e: if self.raise_on_failure: raise - return f"Error: {str(e)}" + return f"Error: {e!s}" def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: """Handle file deletion.""" @@ -225,11 +209,7 @@ def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s content, sha = self._get_file_content(owner, repo, payload["path"], branch) url = f"https://api.github.com/repos/{owner}/{repo}/contents/{payload['path']}" - data = { - "message": payload["message"], - "sha": sha, - "branch": branch - } + data = {"message": payload["message"], "sha": sha, "branch": branch} response = requests.delete(url, headers=self.headers, json=data) response.raise_for_status() @@ -238,7 +218,7 @@ def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s except requests.RequestException as e: if self.raise_on_failure: raise - return f"Error: {str(e)}" + return f"Error: {e!s}" @component.output_types(result=str) def run( @@ -246,7 +226,7 @@ def run( command: Union[Command, str], payload: Dict[str, Any], repo: Optional[str] = None, - branch: Optional[str] = None + branch: Optional[str] = None, ) -> Dict[str, str]: """ Process GitHub file operations. @@ -271,7 +251,7 @@ def run( Command.EDIT: self._edit_file, Command.UNDO: self._undo_changes, Command.CREATE: self._create_file, - Command.DELETE: self._delete_file + Command.DELETE: self._delete_file, } if command not in command_handlers: @@ -287,7 +267,7 @@ def to_dict(self) -> Dict[str, Any]: github_token=self.github_token.to_dict() if self.github_token else None, repo=self.default_repo, branch=self.default_branch, - raise_on_failure=self.raise_on_failure + raise_on_failure=self.raise_on_failure, ) @classmethod @@ -296,4 +276,3 @@ def from_dict(cls, data: Dict[str, Any]) -> "GithubFileEditor": init_params = data["init_parameters"] deserialize_secrets_inplace(init_params, keys=["github_token"]) return default_from_dict(cls, data) - diff --git a/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py index f564eb4e7a..d6b767afd1 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py @@ -32,10 +32,10 @@ class GithubIssueCommenter: """ def __init__( - self, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - raise_on_failure: bool = True, - retry_attempts: int = 2, + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + retry_attempts: int = 2, ): """ Initialize the component. @@ -103,7 +103,7 @@ def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) except requests.exceptions.RequestException as e: if attempt == self.retry_attempts - 1: raise - logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying...") + logger.warning(f"Attempt {attempt + 1} failed: {e!s}. Retrying...") return False @@ -150,6 +150,6 @@ def run(self, url: str, comment: str) -> dict: if self.raise_on_failure: raise - error_message = f"Error posting comment to GitHub issue {url}: {str(e)}" + error_message = f"Error posting comment to GitHub issue {url}: {e!s}" logger.warning(error_message) return {"success": False} diff --git a/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py index 89fdaea771..224c30fa30 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py @@ -128,9 +128,7 @@ def _create_issue_document(self, issue_data: dict) -> Document: }, ) - def _create_comment_document( - self, comment_data: dict, issue_number: int - ) -> Document: + def _create_comment_document(self, comment_data: dict, issue_number: int) -> Document: """ Create a Document from comment data. @@ -193,10 +191,7 @@ def run(self, url: str) -> dict: # Fetch and process comments if they exist if issue_data["comments"] > 0: comments = self._fetch_comments(issue_data["comments_url"]) - documents.extend( - self._create_comment_document(comment, issue_number) - for comment in comments - ) + documents.extend(self._create_comment_document(comment, issue_number) for comment in comments) return {"documents": documents} @@ -204,7 +199,7 @@ def run(self, url: str) -> dict: if self.raise_on_failure: raise - error_message = f"Error processing GitHub issue {url}: {str(e)}" + error_message = f"Error processing GitHub issue {url}: {e!s}" logger.warning(error_message) error_doc = Document( content=error_message, @@ -212,7 +207,6 @@ def run(self, url: str) -> dict: "error": True, "type": "error", "url": url, - } + }, ) return {"documents": [error_doc]} - diff --git a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py index d27d8cb064..ecf30448df 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py @@ -35,11 +35,7 @@ class GithubPRCreator: ``` """ - def __init__( - self, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - raise_on_failure: bool = True - ): + def __init__(self, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True): """ Initialize the component. @@ -61,7 +57,7 @@ def _get_headers(self) -> Dict[str, str]: return { "Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {self.github_token.resolve_value()}", - "User-Agent": "Haystack/GithubPRCreator" + "User-Agent": "Haystack/GithubPRCreator", } def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: @@ -80,10 +76,7 @@ def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: def _get_authenticated_user(self) -> str: """Get the username of the authenticated user (fork owner).""" - response = requests.get( - "https://api.github.com/user", - headers=self._get_headers() - ) + response = requests.get("https://api.github.com/user", headers=self._get_headers()) response.raise_for_status() return response.json()["login"] @@ -100,13 +93,7 @@ def _check_fork_exists(self, owner: str, repo: str, fork_owner: str) -> bool: @component.output_types(result=str) def run( - self, - issue_url: str, - title: str, - branch: str, - base: str, - body: str = "", - draft: bool = False + self, issue_url: str, title: str, branch: str, base: str, body: str = "", draft: bool = False ) -> Dict[str, str]: """ Create a new pull request from your fork to the original repository, linked to the specified issue. @@ -153,14 +140,14 @@ def run( except (requests.RequestException, ValueError) as e: if self.raise_on_failure: raise - return {"result": f"Error: {str(e)}"} + return {"result": f"Error: {e!s}"} def to_dict(self) -> Dict[str, Any]: """Serialize the component to a dictionary.""" return default_to_dict( self, github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure + raise_on_failure=self.raise_on_failure, ) @classmethod diff --git a/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py index c2b9b2a018..000bd41caa 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py @@ -74,7 +74,7 @@ def __init__( raise_on_failure: bool = True, max_file_size: int = 1_000_000, # 1MB default limit repo: Optional[str] = None, - branch: Optional[str] = None + branch: Optional[str] = None, ): """ Initialize the component. @@ -126,9 +126,7 @@ def _parse_repo(self, repo: str) -> tuple[str, str]: """Parse owner/repo string""" parts = repo.split("/") if len(parts) != 2: - raise ValueError( - f"Invalid repository format. Expected 'owner/repo', got '{repo}'" - ) + raise ValueError(f"Invalid repository format. Expected 'owner/repo', got '{repo}'") return parts[0], parts[1] def _normalize_path(self, path: str) -> str: @@ -193,9 +191,7 @@ def _create_error_document(self, error: Exception, path: str) -> Document: ) @component.output_types(documents=List[Document]) - def run( - self, path: str, repo: Optional[str] = None, branch: Optional[str] = None - ) -> Dict[str, List[Document]]: + def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = None) -> Dict[str, List[Document]]: """ Process a GitHub repository path and return documents. @@ -218,9 +214,7 @@ def run( # Handle single file response if not isinstance(contents, list): if contents.get("size", 0) > self.max_file_size: - raise ValueError( - f"File size {contents['size']} exceeds limit of {self.max_file_size}" - ) + raise ValueError(f"File size {contents['size']} exceeds limit of {self.max_file_size}") item = GitHubItem( name=contents["name"], @@ -228,9 +222,7 @@ def run( path=contents["path"], size=contents["size"], url=contents["html_url"], - content=self._process_file_content( - contents["content"], contents["encoding"] - ), + content=self._process_file_content(contents["content"], contents["encoding"]), ) return {"documents": [self._create_file_document(item)]} @@ -250,7 +242,7 @@ def run( except Exception as e: error_doc = self._create_error_document( - f"Error processing repository path {path}: {str(e)}. Seems like the file does not exist.", path + f"Error processing repository path {path}: {e!s}. Seems like the file does not exist.", path ) if self.raise_on_failure: raise @@ -260,4 +252,3 @@ def run( error=str(e), ) return {"documents": [error_doc]} - diff --git a/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py index 9dabf8bba5..3759785fb0 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py @@ -34,14 +34,14 @@ class GithubRepoForker: """ def __init__( - self, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - raise_on_failure: bool = True, - wait_for_completion: bool = False, - max_wait_seconds: int = 300, - poll_interval: int = 2, - auto_sync: bool = True, - create_branch: bool = True, + self, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + wait_for_completion: bool = False, + max_wait_seconds: int = 300, + poll_interval: int = 2, + auto_sync: bool = True, + create_branch: bool = True, ): """ Initialize the component. @@ -65,10 +65,7 @@ def __init__( self.auto_sync = auto_sync self.create_branch = create_branch - self.headers = { - "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubRepoForker" - } + self.headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepoForker"} def _parse_github_url(self, url: str) -> tuple[str, str, str]: """ @@ -96,8 +93,7 @@ def _check_fork_status(self, fork_path: str) -> bool: url = f"https://api.github.com/repos/{fork_path}" try: response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} ) return response.status_code == 200 except requests.RequestException: @@ -112,8 +108,7 @@ def _get_authenticated_user(self) -> str: """ url = "https://api.github.com/user" response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} ) response.raise_for_status() return response.json()["login"] @@ -128,14 +123,13 @@ def _get_existing_repository(self, repo_name: str) -> Optional[str]: url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" try: response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} ) if response.status_code == 200: return repo_name return None except requests.RequestException as e: - logger.warning(f"Failed to check repository existence: {str(e)}") + logger.warning(f"Failed to check repository existence: {e!s}") return None def _sync_fork(self, fork_path: str) -> None: @@ -149,7 +143,7 @@ def _sync_fork(self, fork_path: str) -> None: response = requests.post( url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - json={"branch": "main"} + json={"branch": "main"}, ) response.raise_for_status() @@ -164,8 +158,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # First, get the default branch SHA url = f"https://api.github.com/repos/{fork_path}" response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} ) response.raise_for_status() default_branch = response.json()["default_branch"] @@ -173,8 +166,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # Get the SHA of the default branch url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} ) response.raise_for_status() sha = response.json()["object"]["sha"] @@ -185,10 +177,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: response = requests.post( url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - json={ - "ref": f"refs/heads/{branch_name}", - "sha": sha - } + json={"ref": f"refs/heads/{branch_name}", "sha": sha}, ) response.raise_for_status() @@ -203,8 +192,7 @@ def _create_fork(self, owner: str, repo: str) -> str: """ url = f"https://api.github.com/repos/{owner}/{repo}/forks" response = requests.post( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} ) response.raise_for_status() @@ -268,6 +256,7 @@ def run(self, url: str) -> dict: # Wait for fork completion if requested if self.wait_for_completion: import time + start_time = time.time() while time.time() - start_time < self.max_wait_seconds: diff --git a/integrations/github/src/haystack_integrations/components/prompts/comment_tool.py b/integrations/github/src/haystack_integrations/components/prompts/comment_tool.py index 27baf46e5c..015bb90b72 100644 --- a/integrations/github/src/haystack_integrations/components/prompts/comment_tool.py +++ b/integrations/github/src/haystack_integrations/components/prompts/comment_tool.py @@ -12,11 +12,8 @@ comment_schema = { "properties": { - "comment": { - "type": "string", - "description": "The contents of the comment that you want to create." - } + "comment": {"type": "string", "description": "The contents of the comment that you want to create."} }, "required": ["comment"], - "type": "object" + "type": "object", } diff --git a/integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py b/integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py index 61ac77b0d5..ba8c88f167 100644 --- a/integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py @@ -97,34 +97,24 @@ """ file_editor_schema = { - "type": "object", - "properties": { - "command": { - "type": "string", - "enum": ["edit", "create", "delete", "undo"], - "description": "The command to execute" - }, - "payload": { - "type": "object", - "required": ["message"], - "properties": { - "message": { - "type": "string" - }, - "content": { - "type": "string" + "type": "object", + "properties": { + "command": { + "type": "string", + "enum": ["edit", "create", "delete", "undo"], + "description": "The command to execute", }, - "path": { - "type": "string" + "payload": { + "type": "object", + "required": ["message"], + "properties": { + "message": {"type": "string"}, + "content": {"type": "string"}, + "path": {"type": "string"}, + "original": {"type": "string"}, + "replacement": {"type": "string"}, + }, }, - "original": { - "type": "string" - }, - "replacement": { - "type": "string" - } - } - } - }, - "required": ["command", "payload"] + }, + "required": ["command", "payload"], } diff --git a/integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py index df9cad616b..cfeae5347c 100644 --- a/integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py @@ -64,15 +64,12 @@ repo_viewer_schema = { "properties": { - "repo": { - "type": "string", - "description": "The owner/repository_name that you want to view." - }, + "repo": {"type": "string", "description": "The owner/repository_name that you want to view."}, "path": { "type": "string", "description": "Path to directory or file to view. Defaults to repository root.", - } + }, }, "required": ["repo"], - "type": "object" + "type": "object", } diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py index b6e4aa7d6a..16a1b5bf6a 100644 --- a/integrations/github/tests/test_file_editor.py +++ b/integrations/github/tests/test_file_editor.py @@ -6,7 +6,7 @@ import pytest from haystack.utils import Secret -from haystack_integrations.components.connectors.github.file_editor import GithubFileEditor, Command +from haystack_integrations.components.connectors.github.file_editor import Command, GithubFileEditor class TestGithubFileEditor: @@ -19,12 +19,7 @@ def test_init_default(self): def test_init_with_parameters(self): token = Secret.from_token("test_token") - editor = GithubFileEditor( - github_token=token, - repo="owner/repo", - branch="feature", - raise_on_failure=False - ) + editor = GithubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) assert editor.github_token == token assert editor.default_repo == "owner/repo" assert editor.default_branch == "feature" @@ -34,81 +29,69 @@ def test_init_with_parameters(self): with pytest.raises(TypeError): GithubFileEditor(github_token="not_a_secret") - def test_to_dict(self): token = Secret.from_token("test_token") - editor = GithubFileEditor( - github_token=token, - repo="owner/repo", - branch="feature", - raise_on_failure=False - ) - + editor = GithubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) + result = editor.to_dict() - + assert result["github_token"]["type"] == "haystack.utils.Secret" assert result["repo"] == "owner/repo" assert result["branch"] == "feature" assert result["raise_on_failure"] is False - def test_from_dict(self): data = { - "github_token": { - "type": "haystack.utils.Secret", - "token": "test_token" - }, + "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, "repo": "owner/repo", "branch": "feature", - "raise_on_failure": False + "raise_on_failure": False, } - + editor = GithubFileEditor.from_dict(data) - + assert isinstance(editor.github_token, Secret) assert editor.github_token.resolve_value() == "test_token" assert editor.default_repo == "owner/repo" assert editor.default_branch == "feature" assert editor.raise_on_failure is False - @patch("requests.get") @patch("requests.put") def test_run_edit(self, mock_put, mock_get): # Mock the file content response mock_get.return_value.json.return_value = { "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" - "sha": "abc123" + "sha": "abc123", } mock_get.return_value.raise_for_status.return_value = None - + # Mock the update response mock_put.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") editor = GithubFileEditor(github_token=token) - + result = editor.run( command=Command.EDIT, - payload={ - "path": "test.txt", - "original": "Hello", - "replacement": "Hi", - "message": "Update greeting" - }, + payload={"path": "test.txt", "original": "Hello", "replacement": "Hi", "message": "Update greeting"}, repo="owner/repo", - branch="main" + branch="main", ) - + assert result["result"] == "Edit successful" - + # Verify the API calls mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/test.txt", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, - params={"ref": "main"} + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubFileEditor", + "Authorization": "Bearer test_token", + }, + params={"ref": "main"}, ) - + mock_put.assert_called_once() put_call = mock_put.call_args assert put_call[0][0] == "https://api.github.com/repos/owner/repo/contents/test.txt" @@ -116,157 +99,147 @@ def test_run_edit(self, mock_put, mock_get): assert put_call[1]["json"]["sha"] == "abc123" assert put_call[1]["json"]["branch"] == "main" - @patch("requests.get") @patch("requests.patch") def test_run_undo(self, mock_patch, mock_get): # Mock the user check response - mock_get.return_value.json.return_value = { - "login": "testuser" - } + mock_get.return_value.json.return_value = {"login": "testuser"} mock_get.return_value.raise_for_status.return_value = None - + # Mock the commits response mock_get.side_effect = [ - type("Response", (), {"json": lambda: [{"author": {"login": "testuser"}}], "raise_for_status": lambda: None}), - type("Response", (), {"json": lambda: [{"sha": "abc123"}, {"sha": "def456"}], "raise_for_status": lambda: None}) + type( + "Response", (), {"json": lambda: [{"author": {"login": "testuser"}}], "raise_for_status": lambda: None} + ), + type( + "Response", + (), + {"json": lambda: [{"sha": "abc123"}, {"sha": "def456"}], "raise_for_status": lambda: None}, + ), ] - + # Mock the update response mock_patch.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") editor = GithubFileEditor(github_token=token) - + result = editor.run( - command=Command.UNDO, - payload={"message": "Undo last change"}, - repo="owner/repo", - branch="main" + command=Command.UNDO, payload={"message": "Undo last change"}, repo="owner/repo", branch="main" ) - + assert result["result"] == "Successfully undid last change" - + # Verify the API calls assert mock_get.call_count == 3 mock_patch.assert_called_once_with( "https://api.github.com/repos/owner/repo/git/refs/heads/main", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, - json={"sha": "def456", "force": True} + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubFileEditor", + "Authorization": "Bearer test_token", + }, + json={"sha": "def456", "force": True}, ) - @patch("requests.put") def test_run_create(self, mock_put): # Mock the create response mock_put.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") editor = GithubFileEditor(github_token=token) - + result = editor.run( command=Command.CREATE, - payload={ - "path": "new.txt", - "content": "New file content", - "message": "Create new file" - }, + payload={"path": "new.txt", "content": "New file content", "message": "Create new file"}, repo="owner/repo", - branch="main" + branch="main", ) - + assert result["result"] == "File created successfully" - + # Verify the API call mock_put.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/new.txt", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubFileEditor", + "Authorization": "Bearer test_token", + }, json={ "message": "Create new file", "content": "TmV3IGZpbGUgY29udGVudA==", # Base64 encoded "New file content" - "branch": "main" - } + "branch": "main", + }, ) - @patch("requests.get") @patch("requests.delete") def test_run_delete(self, mock_delete, mock_get): # Mock the file content response - mock_get.return_value.json.return_value = { - "sha": "abc123" - } + mock_get.return_value.json.return_value = {"sha": "abc123"} mock_get.return_value.raise_for_status.return_value = None - + # Mock the delete response mock_delete.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") editor = GithubFileEditor(github_token=token) - + result = editor.run( command=Command.DELETE, - payload={ - "path": "test.txt", - "message": "Delete file" - }, + payload={"path": "test.txt", "message": "Delete file"}, repo="owner/repo", - branch="main" + branch="main", ) - + assert result["result"] == "File deleted successfully" - + # Verify the API calls mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/test.txt", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, - params={"ref": "main"} + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubFileEditor", + "Authorization": "Bearer test_token", + }, + params={"ref": "main"}, ) - + mock_delete.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/test.txt", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", "Authorization": "Bearer test_token"}, - json={ - "message": "Delete file", - "sha": "abc123", - "branch": "main" - } + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubFileEditor", + "Authorization": "Bearer test_token", + }, + json={"message": "Delete file", "sha": "abc123", "branch": "main"}, ) - @patch("requests.get") def test_run_error_handling(self, mock_get): # Mock an error response mock_get.side_effect = Exception("API Error") - + token = Secret.from_token("test_token") editor = GithubFileEditor(github_token=token, raise_on_failure=False) - + result = editor.run( command=Command.EDIT, - payload={ - "path": "test.txt", - "original": "Hello", - "replacement": "Hi", - "message": "Update greeting" - }, + payload={"path": "test.txt", "original": "Hello", "replacement": "Hi", "message": "Update greeting"}, repo="owner/repo", - branch="main" + branch="main", ) - + assert "Error: API Error" in result["result"] - + # Test with raise_on_failure=True editor = GithubFileEditor(github_token=token, raise_on_failure=True) with pytest.raises(Exception): editor.run( command=Command.EDIT, - payload={ - "path": "test.txt", - "original": "Hello", - "replacement": "Hi", - "message": "Update greeting" - }, + payload={"path": "test.txt", "original": "Hello", "replacement": "Hi", "message": "Update greeting"}, repo="owner/repo", - branch="main" + branch="main", ) diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py index 0def098eea..db89fff8ae 100644 --- a/integrations/github/tests/test_issue_commenter.py +++ b/integrations/github/tests/test_issue_commenter.py @@ -8,6 +8,7 @@ from haystack_integrations.components.connectors.github.issue_commenter import GithubIssueCommenter + class TestGithubIssueCommenter: def test_init_default(self): commenter = GithubIssueCommenter() @@ -17,110 +18,91 @@ def test_init_default(self): def test_init_with_parameters(self): token = Secret.from_token("test_token") - commenter = GithubIssueCommenter( - github_token=token, - raise_on_failure=False, - retry_attempts=3 - ) + commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) assert commenter.github_token == token assert commenter.raise_on_failure is False assert commenter.retry_attempts == 3 - def test_to_dict(self): token = Secret.from_token("test_token") - commenter = GithubIssueCommenter( - github_token=token, - raise_on_failure=False, - retry_attempts=3 - ) - + commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) + result = commenter.to_dict() - - assert result["type"] == "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter" + + assert ( + result["type"] == "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter" + ) assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" assert result["init_parameters"]["raise_on_failure"] is False assert result["init_parameters"]["retry_attempts"] == 3 - def test_from_dict(self): data = { "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", "init_parameters": { - "github_token": { - "type": "haystack.utils.Secret", - "token": "test_token" - }, + "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, "raise_on_failure": False, - "retry_attempts": 3 - } + "retry_attempts": 3, + }, } - + commenter = GithubIssueCommenter.from_dict(data) - + assert isinstance(commenter.github_token, Secret) assert commenter.github_token.resolve_value() == "test_token" assert commenter.raise_on_failure is False assert commenter.retry_attempts == 3 - @patch("requests.post") def test_run(self, mock_post): """Test the run method.""" # Mock the successful response mock_post.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") commenter = GithubIssueCommenter(github_token=token) - - result = commenter.run( - url="https://github.com/owner/repo/issues/123", - comment="Test comment" - ) - + + result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") + assert result["success"] is True - + # Verify the API call mock_post.assert_called_once_with( "https://api.github.com/repos/owner/repo/issues/123/comments", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubIssueCommenter", "Authorization": "Bearer test_token"}, - json={"body": "Test comment"} + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubIssueCommenter", + "Authorization": "Bearer test_token", + }, + json={"body": "Test comment"}, ) - @patch("requests.post") def test_run_error_handling(self, mock_post): # Mock an error response mock_post.side_effect = Exception("API Error") - + token = Secret.from_token("test_token") commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False) - - result = commenter.run( - url="https://github.com/owner/repo/issues/123", - comment="Test comment" - ) - + + result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") + assert result["success"] is False - + # Test with raise_on_failure=True commenter = GithubIssueCommenter(github_token=token, raise_on_failure=True) with pytest.raises(Exception): - commenter.run( - url="https://github.com/owner/repo/issues/123", - comment="Test comment" - ) - + commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") def test_parse_github_url(self): token = Secret.from_token("test_token") commenter = GithubIssueCommenter(github_token=token) - + owner, repo, issue_number = commenter._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" assert repo == "repo" assert issue_number == 123 - + # Test with invalid URL with pytest.raises(ValueError): commenter._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py index 9933352cc2..3879cbb6dc 100644 --- a/integrations/github/tests/test_issue_viewer.py +++ b/integrations/github/tests/test_issue_viewer.py @@ -9,6 +9,7 @@ from haystack_integrations.components.connectors.github.issue_viewer import GithubIssueViewer + class TestGithubIssueViewer: def test_init_default(self): viewer = GithubIssueViewer() @@ -18,53 +19,39 @@ def test_init_default(self): def test_init_with_parameters(self): token = Secret.from_token("test_token") - viewer = GithubIssueViewer( - github_token=token, - raise_on_failure=False, - retry_attempts=3 - ) + viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) assert viewer.github_token == token assert viewer.raise_on_failure is False assert viewer.retry_attempts == 3 - def test_to_dict(self): token = Secret.from_token("test_token") - viewer = GithubIssueViewer( - github_token=token, - raise_on_failure=False, - retry_attempts=3 - ) - + viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) + result = viewer.to_dict() - + assert result["type"] == "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer" assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" assert result["init_parameters"]["raise_on_failure"] is False assert result["init_parameters"]["retry_attempts"] == 3 - def test_from_dict(): data = { "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", "init_parameters": { - "github_token": { - "type": "haystack.utils.Secret", - "token": "test_token" - }, + "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, "raise_on_failure": False, - "retry_attempts": 3 - } + "retry_attempts": 3, + }, } - + viewer = GithubIssueViewer.from_dict(data) - + assert isinstance(viewer.github_token, Secret) assert viewer.github_token.resolve_value() == "test_token" assert viewer.raise_on_failure is False assert viewer.retry_attempts == 3 - @patch("requests.get") def test_run(mock_get): """Test the run method.""" @@ -79,77 +66,79 @@ def test_run(mock_get): "user": {"login": "test_user"}, "html_url": "https://github.com/owner/repo/issues/123", "comments": 2, - "comments_url": "https://api.github.com/repos/owner/repo/issues/123/comments" + "comments_url": "https://api.github.com/repos/owner/repo/issues/123/comments", } mock_get.return_value.raise_for_status.return_value = None - + # Mock the comments response mock_get.side_effect = [ mock_get.return_value, # First call for issue - type('Response', (), { - 'json': lambda: [ - { - "body": "Comment 1", - "created_at": "2023-01-01T01:00:00Z", - "updated_at": "2023-01-01T01:00:00Z", - "user": {"login": "commenter1"}, - "html_url": "https://github.com/owner/repo/issues/123#issuecomment-1" - }, - { - "body": "Comment 2", - "created_at": "2023-01-01T02:00:00Z", - "updated_at": "2023-01-01T02:00:00Z", - "user": {"login": "commenter2"}, - "html_url": "https://github.com/owner/repo/issues/123#issuecomment-2" - } - ], - 'raise_for_status': lambda: None - }) + type( + "Response", + (), + { + "json": lambda: [ + { + "body": "Comment 1", + "created_at": "2023-01-01T01:00:00Z", + "updated_at": "2023-01-01T01:00:00Z", + "user": {"login": "commenter1"}, + "html_url": "https://github.com/owner/repo/issues/123#issuecomment-1", + }, + { + "body": "Comment 2", + "created_at": "2023-01-01T02:00:00Z", + "updated_at": "2023-01-01T02:00:00Z", + "user": {"login": "commenter2"}, + "html_url": "https://github.com/owner/repo/issues/123#issuecomment-2", + }, + ], + "raise_for_status": lambda: None, + }, + ), ] - + token = Secret.from_token("test_token") viewer = GithubIssueViewer(github_token=token) - + result = viewer.run(url="https://github.com/owner/repo/issues/123") - + assert len(result["documents"]) == 3 # 1 issue + 2 comments assert result["documents"][0].meta["type"] == "issue" assert result["documents"][1].meta["type"] == "comment" assert result["documents"][2].meta["type"] == "comment" - + # Verify the API calls assert mock_get.call_count == 2 - @patch("requests.get") def test_run_error_handling(mock_get): # Mock an error response mock_get.side_effect = Exception("API Error") - + token = Secret.from_token("test_token") viewer = GithubIssueViewer(github_token=token, raise_on_failure=False) - + result = viewer.run(url="https://github.com/owner/repo/issues/123") - + assert len(result["documents"]) == 1 assert result["documents"][0].meta["type"] == "error" assert result["documents"][0].meta["error"] is True - + # Test with raise_on_failure=True viewer = GithubIssueViewer(github_token=token, raise_on_failure=True) with pytest.raises(Exception): viewer.run(url="https://github.com/owner/repo/issues/123") - def test_parse_github_url(self): token = Secret.from_token("test_token") viewer = GithubIssueViewer(github_token=token) - + owner, repo, issue_number = viewer._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" assert repo == "repo" assert issue_number == 123 - + # Test with invalid URL with pytest.raises(ValueError): viewer._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py index fee24056ce..720f97abc6 100644 --- a/integrations/github/tests/test_pr_creator.py +++ b/integrations/github/tests/test_pr_creator.py @@ -25,85 +25,75 @@ def test_init_with_parameters(self): with pytest.raises(TypeError): GithubPRCreator(github_token="not_a_secret") - def test_to_dict(self): token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) - + result = pr_creator.to_dict() - + assert result["type"] == "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator" assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" assert result["init_parameters"]["raise_on_failure"] is False - def test_from_dict(self): data = { "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", "init_parameters": { - "github_token": { - "type": "haystack.utils.Secret", - "token": "test_token" - }, - "raise_on_failure": False - } + "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, + "raise_on_failure": False, + }, } - + pr_creator = GithubPRCreator.from_dict(data) - + assert isinstance(pr_creator.github_token, Secret) assert pr_creator.github_token.resolve_value() == "test_token" assert pr_creator.raise_on_failure is False - @patch("requests.get") @patch("requests.post") def test_run(self, mock_post, mock_get): # Mock the authenticated user response mock_get.return_value.json.return_value = {"login": "test_user"} mock_get.return_value.raise_for_status.return_value = None - + # Mock the PR creation response mock_post.return_value.json.return_value = {"number": 123} mock_post.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token) - + result = pr_creator.run( issue_url="https://github.com/owner/repo/issues/456", title="Test PR", branch="feature-branch", base="main", body="Test body", - draft=False + draft=False, ) - + assert result["result"] == "Pull request #123 created successfully and linked to issue #456" - + # Verify the API calls mock_get.assert_called_once() mock_post.assert_called_once() - @patch("requests.get") @patch("requests.post") def test_run_error_handling(self, mock_post, mock_get): # Mock an error response mock_get.side_effect = Exception("API Error") - + token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) - + result = pr_creator.run( - issue_url="https://github.com/owner/repo/issues/456", - title="Test PR", - branch="feature-branch", - base="main" + issue_url="https://github.com/owner/repo/issues/456", title="Test PR", branch="feature-branch", base="main" ) - + assert "Error" in result["result"] - + # Test with raise_on_failure=True pr_creator = GithubPRCreator(github_token=token, raise_on_failure=True) with pytest.raises(Exception): @@ -111,5 +101,5 @@ def test_run_error_handling(self, mock_post, mock_get): issue_url="https://github.com/owner/repo/issues/456", title="Test PR", branch="feature-branch", - base="main" + base="main", ) diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index db3fe57439..8bf3db4f3f 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -9,6 +9,7 @@ from haystack_integrations.components.connectors.github.repo_viewer import GithubRepositoryViewer + class TestGithubRepositoryViewer: def test_init_default(self): viewer = GithubRepositoryViewer() @@ -21,11 +22,7 @@ def test_init_default(self): def test_init_with_parameters(self): token = Secret.from_token("test_token") viewer = GithubRepositoryViewer( - github_token=token, - raise_on_failure=False, - max_file_size=500_000, - repo="owner/repo", - branch="main" + github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" ) assert viewer.github_token == token assert viewer.raise_on_failure is False @@ -37,40 +34,30 @@ def test_init_with_parameters(self): with pytest.raises(TypeError): GithubRepositoryViewer(github_token="not_a_secret") - def test_to_dict(self): token = Secret.from_token("test_token") - viewer = GithubRepositoryViewer( - github_token=token, - raise_on_failure=False, - max_file_size=500_000 - ) - + viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=False, max_file_size=500_000) + result = viewer.to_dict() - + assert result["github_token"]["type"] == "haystack.utils.Secret" assert result["raise_on_failure"] is False assert result["max_file_size"] == 500_000 - def test_from_dict(self): data = { - "github_token": { - "type": "haystack.utils.Secret", - "token": "test_token" - }, + "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, "raise_on_failure": False, - "max_file_size": 500_000 + "max_file_size": 500_000, } - + viewer = GithubRepositoryViewer.from_dict(data) - + assert isinstance(viewer.github_token, Secret) assert viewer.github_token.resolve_value() == "test_token" assert viewer.raise_on_failure is False assert viewer.max_file_size == 500_000 - @patch("requests.get") def test_run_file(self, mock_get): # Mock the file response @@ -80,108 +67,92 @@ def test_run_file(self, mock_get): "size": 100, "html_url": "https://github.com/owner/repo/blob/main/README.md", "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" - "encoding": "base64" + "encoding": "base64", } mock_get.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") viewer = GithubRepositoryViewer(github_token=token) - - result = viewer.run( - repo="owner/repo", - path="README.md", - branch="main" - ) - + + result = viewer.run(repo="owner/repo", path="README.md", branch="main") + assert len(result["documents"]) == 1 assert result["documents"][0].content == "Hello World" assert result["documents"][0].meta["type"] == "file_content" assert result["documents"][0].meta["path"] == "README.md" - + # Verify the API call mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/README.md?ref=main", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepositoryViewer", "Authorization": "Bearer test_token"} + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubRepositoryViewer", + "Authorization": "Bearer test_token", + }, ) - @patch("requests.get") def test_run_directory(self, mock_get): # Mock the directory response mock_get.return_value.json.return_value = [ - { - "name": "docs", - "path": "docs", - "type": "dir", - "html_url": "https://github.com/owner/repo/tree/main/docs" - }, + {"name": "docs", "path": "docs", "type": "dir", "html_url": "https://github.com/owner/repo/tree/main/docs"}, { "name": "README.md", "path": "README.md", "type": "file", "size": 100, - "html_url": "https://github.com/owner/repo/blob/main/README.md" - } + "html_url": "https://github.com/owner/repo/blob/main/README.md", + }, ] mock_get.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") viewer = GithubRepositoryViewer(github_token=token) - - result = viewer.run( - repo="owner/repo", - path="", - branch="main" - ) - + + result = viewer.run(repo="owner/repo", path="", branch="main") + assert len(result["documents"]) == 2 assert result["documents"][0].content == "docs" assert result["documents"][0].meta["type"] == "dir" assert result["documents"][1].content == "README.md" assert result["documents"][1].meta["type"] == "file" - + # Verify the API call mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/?ref=main", - headers={"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepositoryViewer", "Authorization": "Bearer test_token"} + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubRepositoryViewer", + "Authorization": "Bearer test_token", + }, ) - @patch("requests.get") def test_run_error_handling(self, mock_get): # Mock an error response mock_get.side_effect = Exception("API Error") - + token = Secret.from_token("test_token") viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=False) - - result = viewer.run( - repo="owner/repo", - path="README.md", - branch="main" - ) - + + result = viewer.run(repo="owner/repo", path="README.md", branch="main") + assert len(result["documents"]) == 1 assert result["documents"][0].meta["type"] == "error" - + # Test with raise_on_failure=True viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=True) with pytest.raises(Exception): - viewer.run( - repo="owner/repo", - path="README.md", - branch="main" - ) - + viewer.run(repo="owner/repo", path="README.md", branch="main") def test_parse_repo(self): token = Secret.from_token("test_token") viewer = GithubRepositoryViewer(github_token=token) - + owner, repo = viewer._parse_repo("owner/repo") assert owner == "owner" assert repo == "repo" - + # Test with invalid format with pytest.raises(ValueError): viewer._parse_repo("invalid_format") diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py index 62c3772e36..294097aa41 100644 --- a/integrations/github/tests/test_repository_forker.py +++ b/integrations/github/tests/test_repository_forker.py @@ -8,6 +8,7 @@ from haystack_integrations.components.connectors.github.repository_forker import GithubRepoForker + class TestGithubRepoForker: def test_init_default(self): forker = GithubRepoForker() @@ -28,7 +29,7 @@ def test_init_with_parameters(self): max_wait_seconds=60, poll_interval=1, auto_sync=False, - create_branch=False + create_branch=False, ) assert forker.github_token == token assert forker.raise_on_failure is False @@ -42,7 +43,6 @@ def test_init_with_parameters(self): with pytest.raises(TypeError): GithubRepoForker(github_token="not_a_secret") - def test_to_dict(self): token = Secret.from_token("test_token") forker = GithubRepoForker( @@ -52,11 +52,11 @@ def test_to_dict(self): max_wait_seconds=60, poll_interval=1, auto_sync=False, - create_branch=False + create_branch=False, ) - + result = forker.to_dict() - + assert result["type"] == "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker" assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" assert result["init_parameters"]["raise_on_failure"] is False @@ -66,26 +66,22 @@ def test_to_dict(self): assert result["init_parameters"]["auto_sync"] is False assert result["init_parameters"]["create_branch"] is False - def test_from_dict(self): data = { "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", "init_parameters": { - "github_token": { - "type": "haystack.utils.Secret", - "token": "test_token" - }, + "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, "raise_on_failure": False, "wait_for_completion": True, "max_wait_seconds": 60, "poll_interval": 1, "auto_sync": False, - "create_branch": False - } + "create_branch": False, + }, } - + forker = GithubRepoForker.from_dict(data) - + assert isinstance(forker.github_token, Secret) assert forker.github_token.resolve_value() == "test_token" assert forker.raise_on_failure is False @@ -95,103 +91,83 @@ def test_from_dict(self): assert forker.auto_sync is False assert forker.create_branch is False - @patch("requests.get") @patch("requests.post") def test_run_create_fork(self, mock_post, mock_get): # Mock the authenticated user response mock_get.return_value.json.return_value = {"login": "test_user"} mock_get.return_value.raise_for_status.return_value = None - + # Mock the fork creation response - mock_post.return_value.json.return_value = { - "owner": {"login": "test_user"}, - "name": "repo" - } + mock_post.return_value.json.return_value = {"owner": {"login": "test_user"}, "name": "repo"} mock_post.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") - forker = GithubRepoForker( - github_token=token, - create_branch=True, - auto_sync=False - ) - + forker = GithubRepoForker(github_token=token, create_branch=True, auto_sync=False) + result = forker.run(url="https://github.com/owner/repo/issues/123") - + assert result["repo"] == "test_user/repo" assert result["issue_branch"] == "fix-123" - + # Verify the API calls mock_get.assert_called_once() assert mock_post.call_count == 2 # One for fork creation, one for branch creation - @patch("requests.get") @patch("requests.post") def test_run_sync_existing_fork(self, mock_post, mock_get): # Mock the authenticated user response mock_get.side_effect = [ - type('Response', (), { - 'json': lambda: {"login": "test_user"}, - 'raise_for_status': lambda: None - }), - type('Response', (), { - 'status_code': 200, - 'json': lambda: {"name": "repo"}, - 'raise_for_status': lambda: None - }) + type("Response", (), {"json": lambda: {"login": "test_user"}, "raise_for_status": lambda: None}), + type( + "Response", (), {"status_code": 200, "json": lambda: {"name": "repo"}, "raise_for_status": lambda: None} + ), ] - + # Mock the sync response mock_post.return_value.raise_for_status.return_value = None - + token = Secret.from_token("test_token") - forker = GithubRepoForker( - github_token=token, - create_branch=True, - auto_sync=True - ) - + forker = GithubRepoForker(github_token=token, create_branch=True, auto_sync=True) + result = forker.run(url="https://github.com/owner/repo/issues/123") - + assert result["repo"] == "test_user/repo" assert result["issue_branch"] == "fix-123" - + # Verify the API calls assert mock_get.call_count == 2 assert mock_post.call_count == 2 # One for sync, one for branch creation - @patch("requests.get") @patch("requests.post") def test_run_error_handling(self, mock_post, mock_get): # Mock an error response mock_get.side_effect = Exception("API Error") - + token = Secret.from_token("test_token") forker = GithubRepoForker(github_token=token, raise_on_failure=False) - + result = forker.run(url="https://github.com/owner/repo/issues/123") - + assert result["repo"] == "" assert result["issue_branch"] is None - + # Test with raise_on_failure=True forker = GithubRepoForker(github_token=token, raise_on_failure=True) with pytest.raises(Exception): forker.run(url="https://github.com/owner/repo/issues/123") - def test_parse_github_url(self): token = Secret.from_token("test_token") forker = GithubRepoForker(github_token=token) - + owner, repo, issue_number = forker._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" assert repo == "repo" assert issue_number == "123" - + # Test with invalid URL with pytest.raises(ValueError): forker._parse_github_url("https://github.com/invalid/url") From 79dc7db5041a92fc3593d96e01116fe0cde5a3b2 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 10:02:11 +0200 Subject: [PATCH 08/51] fmt --- .../components/connectors/issue_commenter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py index d6b767afd1..4fec26dd77 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py @@ -103,7 +103,7 @@ def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) except requests.exceptions.RequestException as e: if attempt == self.retry_attempts - 1: raise - logger.warning(f"Attempt {attempt + 1} failed: {e!s}. Retrying...") + logger.warning(f"Attempt {attempt + 1} failed: str(e). Retrying...") return False @@ -150,6 +150,6 @@ def run(self, url: str, comment: str) -> dict: if self.raise_on_failure: raise - error_message = f"Error posting comment to GitHub issue {url}: {e!s}" + error_message = f"Error posting comment to GitHub issue {url}: str(e)" logger.warning(error_message) return {"success": False} From cf3a2f596b159e5eb4543e2476331c7e33da4535 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 10:11:28 +0200 Subject: [PATCH 09/51] lint --- integrations/github/tests/test_file_editor.py | 5 +++-- integrations/github/tests/test_issue_viewer.py | 7 +++---- integrations/github/tests/test_pr_creator.py | 2 +- integrations/github/tests/test_repo_viewer.py | 1 - integrations/github/tests/test_repository_forker.py | 5 +++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py index 16a1b5bf6a..d3541edd74 100644 --- a/integrations/github/tests/test_file_editor.py +++ b/integrations/github/tests/test_file_editor.py @@ -5,6 +5,7 @@ import pytest from haystack.utils import Secret +import requests from haystack_integrations.components.connectors.github.file_editor import Command, GithubFileEditor @@ -220,7 +221,7 @@ def test_run_delete(self, mock_delete, mock_get): @patch("requests.get") def test_run_error_handling(self, mock_get): # Mock an error response - mock_get.side_effect = Exception("API Error") + mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") editor = GithubFileEditor(github_token=token, raise_on_failure=False) @@ -236,7 +237,7 @@ def test_run_error_handling(self, mock_get): # Test with raise_on_failure=True editor = GithubFileEditor(github_token=token, raise_on_failure=True) - with pytest.raises(Exception): + with pytest.raises(requests.RequestException): editor.run( command=Command.EDIT, payload={"path": "test.txt", "original": "Hello", "replacement": "Hi", "message": "Update greeting"}, diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py index 3879cbb6dc..8a1d9c849b 100644 --- a/integrations/github/tests/test_issue_viewer.py +++ b/integrations/github/tests/test_issue_viewer.py @@ -4,7 +4,6 @@ from unittest.mock import patch import pytest -from haystack import Document from haystack.utils import Secret from haystack_integrations.components.connectors.github.issue_viewer import GithubIssueViewer @@ -35,7 +34,7 @@ def test_to_dict(self): assert result["init_parameters"]["raise_on_failure"] is False assert result["init_parameters"]["retry_attempts"] == 3 - def test_from_dict(): + def test_from_dict(self): data = { "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", "init_parameters": { @@ -53,7 +52,7 @@ def test_from_dict(): assert viewer.retry_attempts == 3 @patch("requests.get") - def test_run(mock_get): + def test_run(self, mock_get): """Test the run method.""" # Mock the issue response mock_get.return_value.json.return_value = { @@ -112,7 +111,7 @@ def test_run(mock_get): assert mock_get.call_count == 2 @patch("requests.get") - def test_run_error_handling(mock_get): + def test_run_error_handling(self, mock_get): # Mock an error response mock_get.side_effect = Exception("API Error") diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py index 720f97abc6..66e4a29e9f 100644 --- a/integrations/github/tests/test_pr_creator.py +++ b/integrations/github/tests/test_pr_creator.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from unittest.mock import Mock, patch +from unittest.mock import patch import pytest from haystack.utils import Secret diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index 8bf3db4f3f..d0e579bdae 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -4,7 +4,6 @@ from unittest.mock import patch import pytest -from haystack import Document from haystack.utils import Secret from haystack_integrations.components.connectors.github.repo_viewer import GithubRepositoryViewer diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py index 294097aa41..d4b3d20511 100644 --- a/integrations/github/tests/test_repository_forker.py +++ b/integrations/github/tests/test_repository_forker.py @@ -5,6 +5,7 @@ import pytest from haystack.utils import Secret +import requests from haystack_integrations.components.connectors.github.repository_forker import GithubRepoForker @@ -144,7 +145,7 @@ def test_run_sync_existing_fork(self, mock_post, mock_get): @patch("requests.post") def test_run_error_handling(self, mock_post, mock_get): # Mock an error response - mock_get.side_effect = Exception("API Error") + mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") forker = GithubRepoForker(github_token=token, raise_on_failure=False) @@ -156,7 +157,7 @@ def test_run_error_handling(self, mock_post, mock_get): # Test with raise_on_failure=True forker = GithubRepoForker(github_token=token, raise_on_failure=True) - with pytest.raises(Exception): + with pytest.raises(requests.RequestException): forker.run(url="https://github.com/owner/repo/issues/123") def test_parse_github_url(self): From f82412fbb03035f810bb8185b1e174b6b04f3f20 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 16:02:12 +0200 Subject: [PATCH 10/51] ruff --- integrations/github/pyproject.toml | 3 + .../components/connectors/file_editor.py | 24 +-- .../components/connectors/issue_commenter.py | 13 +- .../components/connectors/issue_viewer.py | 7 +- .../components/connectors/pr_creator.py | 150 +++++++++++++++++- .../components/connectors/repo_viewer.py | 13 +- .../connectors/repository_forker.py | 34 ++-- .../components/prompts/__init__.py | 2 +- integrations/github/tests/test_file_editor.py | 2 +- .../github/tests/test_issue_commenter.py | 5 +- .../github/tests/test_issue_viewer.py | 5 +- integrations/github/tests/test_pr_creator.py | 7 +- integrations/github/tests/test_repo_viewer.py | 5 +- .../github/tests/test_repository_forker.py | 4 +- 14 files changed, 221 insertions(+), 53 deletions(-) diff --git a/integrations/github/pyproject.toml b/integrations/github/pyproject.toml index 6a29ffa2ab..d3a8d0fce5 100644 --- a/integrations/github/pyproject.toml +++ b/integrations/github/pyproject.toml @@ -59,6 +59,7 @@ cov-report = ["- coverage combine", "coverage report"] cov = ["test-cov", "cov-report"] cov-retry = ["test-cov-retry", "cov-report"] docs = ["pydoc-markdown pydoc/config.yml"] +fix = "ruff check --fix" [tool.hatch.envs.lint] installer = "uv" @@ -140,6 +141,8 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports "tests/**/*" = ["PLR2004", "S101", "TID252"] +# Ignore RUF001 for all files in the prompts directory +"src/haystack_integrations/components/prompts/**/*" = ["RUF001"] [tool.coverage.run] source = ["haystack_integrations"] diff --git a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py index 12cd41c579..0cd67a92f0 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py @@ -88,7 +88,8 @@ def __init__( :param raise_on_failure: If True, raises exceptions on API errors """ if not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") + error_message = "github_token must be a Secret" + raise TypeError(error_message) self.github_token = github_token self.default_repo = repo @@ -104,7 +105,7 @@ def __init__( def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: """Get file content and SHA from GitHub.""" url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" - response = requests.get(url, headers=self.headers, params={"ref": branch}) + response = requests.get(url, headers=self.headers, params={"ref": branch}, timeout=10) response.raise_for_status() data = response.json() content = b64decode(data["content"]).decode("utf-8") @@ -119,20 +120,20 @@ def _update_file(self, owner: str, repo: str, path: str, content: str, message: "sha": sha, "branch": branch, } - response = requests.put(url, headers=self.headers, json=payload) + response = requests.put(url, headers=self.headers, json=payload, timeout=10) response.raise_for_status() return True def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: """Check if last commit was made by the current token user.""" url = f"https://api.github.com/repos/{owner}/{repo}/commits" - response = requests.get(url, headers=self.headers, params={"per_page": 1, "sha": branch}) + response = requests.get(url, headers=self.headers, params={"per_page": 1, "sha": branch}, timeout=10) response.raise_for_status() last_commit = response.json()[0] commit_author = last_commit["author"]["login"] # Get current user - user_response = requests.get("https://api.github.com/user", headers=self.headers) + user_response = requests.get("https://api.github.com/user", headers=self.headers, timeout=10) user_response.raise_for_status() current_user = user_response.json()["login"] @@ -171,12 +172,17 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" # Get the previous commit SHA - commits = requests.get(commits_url, headers=self.headers, params={"per_page": 2, "sha": branch}).json() + commits = requests.get( + commits_url, + headers=self.headers, + params={"per_page": 2, "sha": branch}, + timeout=10 + ).json() previous_sha = commits[1]["sha"] # Update branch reference to previous commit payload = {"sha": previous_sha, "force": True} - response = requests.patch(url, headers=self.headers, json=payload) + response = requests.patch(url, headers=self.headers, json=payload, timeout=10) response.raise_for_status() return "Successfully undid last change" @@ -194,7 +200,7 @@ def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s data = {"message": payload["message"], "content": content, "branch": branch} - response = requests.put(url, headers=self.headers, json=data) + response = requests.put(url, headers=self.headers, json=data, timeout=10) response.raise_for_status() return "File created successfully" @@ -211,7 +217,7 @@ def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s data = {"message": payload["message"], "sha": sha, "branch": branch} - response = requests.delete(url, headers=self.headers, json=data) + response = requests.delete(url, headers=self.headers, json=data, timeout=10) response.raise_for_status() return "File deleted successfully" diff --git a/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py index 4fec26dd77..558c13f9b0 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py @@ -1,5 +1,5 @@ import re -from typing import Any, Dict, Optional +from typing import Any, Dict import requests from haystack import component, default_from_dict, default_to_dict, logging @@ -76,7 +76,8 @@ def _parse_github_url(self, url: str) -> tuple[str, str, int]: pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" match = re.match(pattern, url) if not match: - raise ValueError(f"Invalid GitHub issue URL format: {url}") + msg = f"Invalid GitHub issue URL format: {url}" + raise ValueError(msg) owner, repo, issue_number = match.groups() return owner, repo, int(issue_number) @@ -97,13 +98,13 @@ def _post_comment(self, owner: str, repo: str, issue_number: int, comment: str) for attempt in range(self.retry_attempts): try: - response = requests.post(url, headers=self._get_request_headers(), json=data) + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() return True except requests.exceptions.RequestException as e: if attempt == self.retry_attempts - 1: raise - logger.warning(f"Attempt {attempt + 1} failed: str(e). Retrying...") + logger.warning(f"Attempt {attempt + 1} failed: {e!s}. Retrying...") return False @@ -146,10 +147,10 @@ def run(self, url: str, comment: str) -> dict: success = self._post_comment(owner, repo, issue_number, comment) return {"success": success} - except Exception as e: + except (requests.exceptions.RequestException, ValueError) as e: if self.raise_on_failure: raise - error_message = f"Error posting comment to GitHub issue {url}: str(e)" + error_message = f"Error posting comment to GitHub issue {url}: {e!s}" logger.warning(error_message) return {"success": False} diff --git a/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py index 224c30fa30..c61d5b0d95 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py @@ -77,7 +77,8 @@ def _parse_github_url(self, url: str) -> tuple[str, str, int]: pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" match = re.match(pattern, url) if not match: - raise ValueError(f"Invalid GitHub issue URL format: {url}") + msg = f"Invalid GitHub issue URL format: {url}" + raise ValueError(msg) owner, repo, issue_number = match.groups() return owner, repo, int(issue_number) @@ -92,7 +93,7 @@ def _fetch_issue(self, owner: str, repo: str, issue_number: int) -> Any: :return: Issue data dictionary """ url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}" - response = requests.get(url, headers=self._get_request_headers()) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() return response.json() @@ -103,7 +104,7 @@ def _fetch_comments(self, comments_url: str) -> Any: :param comments_url: URL for issue comments :return: List of comment dictionaries """ - response = requests.get(comments_url, headers=self._get_request_headers()) + response = requests.get(comments_url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() return response.json() diff --git a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py index ecf30448df..0df7445761 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py @@ -43,7 +43,8 @@ def __init__(self, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), r :param raise_on_failure: If True, raises exceptions on API errors """ if not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") + msg = "github_token must be a Secret" + raise TypeError(msg) self.github_token = github_token self.raise_on_failure = raise_on_failure @@ -71,26 +72,161 @@ def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: pattern = r"https://github\.com/([^/]+)/([^/]+)/issues/(\d+)" match = re.match(pattern, issue_url) if not match: - raise ValueError("Invalid GitHub issue URL format") + msg = "Invalid GitHub issue URL format" + raise ValueError(msg) return match.group(1), match.group(2), match.group(3) def _get_authenticated_user(self) -> str: """Get the username of the authenticated user (fork owner).""" - response = requests.get("https://api.github.com/user", headers=self._get_headers()) + response = requests.get("https://api.github.com/user", headers=self._get_headers(), timeout=10) response.raise_for_status() return response.json()["login"] - def _check_fork_exists(self, owner: str, repo: str, fork_owner: str) -> bool: + def _check_fork_exists(self, repo: str, fork_owner: str) -> bool: """Check if the fork exists.""" url = f"https://api.github.com/repos/{fork_owner}/{repo}" try: - response = requests.get(url, headers=self._get_headers()) + response = requests.get(url, headers=self._get_headers(), timeout=10) response.raise_for_status() fork_data = response.json() return fork_data.get("fork", False) except requests.RequestException: return False + def _create_fork(self, owner: str, repo: str) -> str: + """Create a fork of the repository.""" + url = f"https://api.github.com/repos/{owner}/{repo}/forks" + try: + response = requests.post(url, headers=self._get_headers(), timeout=10) + response.raise_for_status() + fork_data = response.json() + return fork_data["owner"]["login"] + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create fork: {e!s}" + raise RuntimeError(msg) from e + return None + + def _create_branch(self, owner: str, repo: str, branch_name: str, base_branch: str) -> bool: + """Create a new branch in the repository.""" + # Get the SHA of the base branch + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{base_branch}" + try: + response = requests.get(url, headers=self._get_headers(), timeout=10) + response.raise_for_status() + base_sha = response.json()["object"]["sha"] + + # Create the new branch + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs" + data = { + "ref": f"refs/heads/{branch_name}", + "sha": base_sha + } + response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response.raise_for_status() + return True + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create branch: {e!s}" + raise RuntimeError(msg) from e + return False + + def _create_commit( + self, + owner: str, + repo: str, + branch_name: str, + file_path: str, + content: str, + message: str, + ) -> bool: + """Create a commit with the file changes.""" + # Get the current commit SHA + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}" + try: + response = requests.get(url, headers=self._get_headers(), timeout=10) + response.raise_for_status() + current_sha = response.json()["object"]["sha"] + + # Create a blob with the file content + url = f"https://api.github.com/repos/{owner}/{repo}/git/blobs" + data = { + "content": content, + "encoding": "base64" + } + response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response.raise_for_status() + blob_sha = response.json()["sha"] + + # Create a tree with the new file + url = f"https://api.github.com/repos/{owner}/{repo}/git/trees" + data = { + "base_tree": current_sha, + "tree": [ + { + "path": file_path, + "mode": "100644", + "type": "blob", + "sha": blob_sha + } + ] + } + response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response.raise_for_status() + tree_sha = response.json()["sha"] + + # Create the commit + url = f"https://api.github.com/repos/{owner}/{repo}/git/commits" + data = { + "message": message, + "tree": tree_sha, + "parents": [current_sha] + } + response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response.raise_for_status() + commit_sha = response.json()["sha"] + + # Update the branch reference + url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}" + data = { + "sha": commit_sha + } + response = requests.patch(url, headers=self._get_headers(), json=data, timeout=10) + response.raise_for_status() + return True + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create commit: {e!s}" + raise RuntimeError(msg) from e + return False + + def _create_pull_request( + self, + owner: str, + repo: str, + branch_name: str, + base_branch: str, + title: str, + body: str, + ) -> bool: + """Create a pull request.""" + url = f"https://api.github.com/repos/{owner}/{repo}/pulls" + data = { + "title": title, + "body": body, + "head": branch_name, + "base": base_branch + } + try: + response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response.raise_for_status() + return True + except requests.RequestException as e: + if self.raise_on_failure: + msg = f"Failed to create pull request: {e!s}" + raise RuntimeError(msg) from e + return False + @component.output_types(result=str) def run( self, issue_url: str, title: str, branch: str, base: str, body: str = "", draft: bool = False @@ -114,7 +250,7 @@ def run( fork_owner = self._get_authenticated_user() # Check if the fork exists - if not self._check_fork_exists(owner, repo_name, fork_owner): + if not self._check_fork_exists(repo_name, fork_owner): return {"result": f"Error: Fork not found at {fork_owner}/{repo_name}"} url = f"https://api.github.com/repos/{owner}/{repo_name}/pulls" @@ -131,7 +267,7 @@ def run( "maintainer_can_modify": True, # Allow maintainers to modify the PR } - response = requests.post(url, headers=self._get_headers(), json=pr_data) + response = requests.post(url, headers=self._get_headers(), json=pr_data, timeout=10) response.raise_for_status() pr_number = response.json()["number"] diff --git a/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py index 000bd41caa..b7f65b8433 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py @@ -84,7 +84,8 @@ def __init__( :param max_file_size: Maximum file size in bytes to fetch (default: 1MB) """ if github_token is not None and not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") + msg = "github_token must be a Secret" + raise TypeError(msg) self.github_token = github_token self.raise_on_failure = raise_on_failure @@ -125,8 +126,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "GithubRepositoryViewer": def _parse_repo(self, repo: str) -> tuple[str, str]: """Parse owner/repo string""" parts = repo.split("/") - if len(parts) != 2: - raise ValueError(f"Invalid repository format. Expected 'owner/repo', got '{repo}'") + if len(parts) != 2: # noqa: PLR2004 + msg = f"Invalid repository format. Expected 'owner/repo', got '{repo}'" + raise ValueError(msg) return parts[0], parts[1] def _normalize_path(self, path: str) -> str: @@ -143,7 +145,7 @@ def _fetch_contents(self, owner: str, repo: str, path: str, ref: str) -> Any: if self.github_token: headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" - response = requests.get(url, headers=headers) + response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() return response.json() @@ -214,7 +216,8 @@ def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = Non # Handle single file response if not isinstance(contents, list): if contents.get("size", 0) > self.max_file_size: - raise ValueError(f"File size {contents['size']} exceeds limit of {self.max_file_size}") + error_message = f"File size {contents['size']} exceeds limit of {self.max_file_size}" + raise ValueError(error_message) item = GitHubItem( name=contents["name"], diff --git a/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py index 3759785fb0..89e1f764d4 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py @@ -54,8 +54,9 @@ def __init__( :param auto_sync: If True, syncs fork with original repository if it already exists :param create_branch: If True, creates a fix branch based on the issue number """ + error_message = "github_token must be a Secret" if not isinstance(github_token, Secret): - raise TypeError("github_token must be a Secret") + raise TypeError(error_message) self.github_token = github_token self.raise_on_failure = raise_on_failure @@ -78,7 +79,8 @@ def _parse_github_url(self, url: str) -> tuple[str, str, str]: pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" match = re.match(pattern, url) if not match: - raise ValueError(f"Invalid GitHub issue URL format: {url}") + error_message = f"Invalid GitHub issue URL format: {url}" + raise ValueError(error_message) owner, repo, issue_number = match.groups() return owner, repo, issue_number @@ -93,9 +95,11 @@ def _check_fork_status(self, fork_path: str) -> bool: url = f"https://api.github.com/repos/{fork_path}" try: response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + timeout=10 ) - return response.status_code == 200 + return response.status_code == 200 # noqa: PLR2004 except requests.RequestException: return False @@ -108,7 +112,9 @@ def _get_authenticated_user(self) -> str: """ url = "https://api.github.com/user" response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + timeout=10 ) response.raise_for_status() return response.json()["login"] @@ -123,9 +129,11 @@ def _get_existing_repository(self, repo_name: str) -> Optional[str]: url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" try: response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + timeout=10 ) - if response.status_code == 200: + if response.status_code == 200: # noqa: PLR2004 return repo_name return None except requests.RequestException as e: @@ -144,6 +152,7 @@ def _sync_fork(self, fork_path: str) -> None: url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, json={"branch": "main"}, + timeout=10 ) response.raise_for_status() @@ -158,7 +167,9 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # First, get the default branch SHA url = f"https://api.github.com/repos/{fork_path}" response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + timeout=10 ) response.raise_for_status() default_branch = response.json()["default_branch"] @@ -166,7 +177,9 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # Get the SHA of the default branch url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, + headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + timeout=10 ) response.raise_for_status() sha = response.json()["object"]["sha"] @@ -178,6 +191,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, json={"ref": f"refs/heads/{branch_name}", "sha": sha}, + timeout=10 ) response.raise_for_status() @@ -192,7 +206,7 @@ def _create_fork(self, owner: str, repo: str) -> str: """ url = f"https://api.github.com/repos/{owner}/{repo}/forks" response = requests.post( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"} + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 ) response.raise_for_status() diff --git a/integrations/github/src/haystack_integrations/components/prompts/__init__.py b/integrations/github/src/haystack_integrations/components/prompts/__init__.py index 984d948c61..99f69f7326 100644 --- a/integrations/github/src/haystack_integrations/components/prompts/__init__.py +++ b/integrations/github/src/haystack_integrations/components/prompts/__init__.py @@ -5,4 +5,4 @@ from .repo_viewer_tool import repo_viewer_prompt, repo_viewer_schema from .system_prompt import issue_prompt -_all_ = ["issue_prompt", "repo_viewer_prompt", "repo_viewer_schema"] +__all__ = ["issue_prompt", "repo_viewer_prompt", "repo_viewer_schema"] diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py index d3541edd74..c0d5f466e8 100644 --- a/integrations/github/tests/test_file_editor.py +++ b/integrations/github/tests/test_file_editor.py @@ -4,8 +4,8 @@ from unittest.mock import patch import pytest -from haystack.utils import Secret import requests +from haystack.utils import Secret from haystack_integrations.components.connectors.github.file_editor import Command, GithubFileEditor diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py index db89fff8ae..be5d730d0f 100644 --- a/integrations/github/tests/test_issue_commenter.py +++ b/integrations/github/tests/test_issue_commenter.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest +import requests from haystack.utils import Secret from haystack_integrations.components.connectors.github.issue_commenter import GithubIssueCommenter @@ -80,7 +81,7 @@ def test_run(self, mock_post): @patch("requests.post") def test_run_error_handling(self, mock_post): # Mock an error response - mock_post.side_effect = Exception("API Error") + mock_post.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False) @@ -91,7 +92,7 @@ def test_run_error_handling(self, mock_post): # Test with raise_on_failure=True commenter = GithubIssueCommenter(github_token=token, raise_on_failure=True) - with pytest.raises(Exception): + with pytest.raises(requests.RequestException): commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") def test_parse_github_url(self): diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py index 8a1d9c849b..f25a079728 100644 --- a/integrations/github/tests/test_issue_viewer.py +++ b/integrations/github/tests/test_issue_viewer.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest +import requests from haystack.utils import Secret from haystack_integrations.components.connectors.github.issue_viewer import GithubIssueViewer @@ -113,7 +114,7 @@ def test_run(self, mock_get): @patch("requests.get") def test_run_error_handling(self, mock_get): # Mock an error response - mock_get.side_effect = Exception("API Error") + mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") viewer = GithubIssueViewer(github_token=token, raise_on_failure=False) @@ -126,7 +127,7 @@ def test_run_error_handling(self, mock_get): # Test with raise_on_failure=True viewer = GithubIssueViewer(github_token=token, raise_on_failure=True) - with pytest.raises(Exception): + with pytest.raises(requests.RequestException): viewer.run(url="https://github.com/owner/repo/issues/123") def test_parse_github_url(self): diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py index 66e4a29e9f..10089ba7ff 100644 --- a/integrations/github/tests/test_pr_creator.py +++ b/integrations/github/tests/test_pr_creator.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest +import requests from haystack.utils import Secret from haystack_integrations.components.connectors.github.pr_creator import GithubPRCreator @@ -81,9 +82,9 @@ def test_run(self, mock_post, mock_get): @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, mock_post, mock_get): + def test_run_error_handling(self, mock_get): # Mock an error response - mock_get.side_effect = Exception("API Error") + mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) @@ -96,7 +97,7 @@ def test_run_error_handling(self, mock_post, mock_get): # Test with raise_on_failure=True pr_creator = GithubPRCreator(github_token=token, raise_on_failure=True) - with pytest.raises(Exception): + with pytest.raises(requests.RequestException): pr_creator.run( issue_url="https://github.com/owner/repo/issues/456", title="Test PR", diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index d0e579bdae..47511a193b 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -4,6 +4,7 @@ from unittest.mock import patch import pytest +import requests from haystack.utils import Secret from haystack_integrations.components.connectors.github.repo_viewer import GithubRepositoryViewer @@ -129,7 +130,7 @@ def test_run_directory(self, mock_get): @patch("requests.get") def test_run_error_handling(self, mock_get): # Mock an error response - mock_get.side_effect = Exception("API Error") + mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=False) @@ -141,7 +142,7 @@ def test_run_error_handling(self, mock_get): # Test with raise_on_failure=True viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=True) - with pytest.raises(Exception): + with pytest.raises(requests.RequestException): viewer.run(repo="owner/repo", path="README.md", branch="main") def test_parse_repo(self): diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py index d4b3d20511..5ff9e9a67d 100644 --- a/integrations/github/tests/test_repository_forker.py +++ b/integrations/github/tests/test_repository_forker.py @@ -4,8 +4,8 @@ from unittest.mock import patch import pytest -from haystack.utils import Secret import requests +from haystack.utils import Secret from haystack_integrations.components.connectors.github.repository_forker import GithubRepoForker @@ -143,7 +143,7 @@ def test_run_sync_existing_fork(self, mock_post, mock_get): @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, mock_post, mock_get): + def test_run_error_handling(self, mock_get): # Mock an error response mock_get.side_effect = requests.RequestException("API Error") From fa9cbe3b4d315b407502d8041183cf82ba141351 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 16:03:30 +0200 Subject: [PATCH 11/51] fmt --- .../components/connectors/file_editor.py | 5 +-- .../components/connectors/pr_creator.py | 36 ++++--------------- .../connectors/repository_forker.py | 20 ++++------- 3 files changed, 14 insertions(+), 47 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py index 0cd67a92f0..e3563d33da 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py @@ -173,10 +173,7 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: # Get the previous commit SHA commits = requests.get( - commits_url, - headers=self.headers, - params={"per_page": 2, "sha": branch}, - timeout=10 + commits_url, headers=self.headers, params={"per_page": 2, "sha": branch}, timeout=10 ).json() previous_sha = commits[1]["sha"] diff --git a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py index 0df7445761..e803d9f666 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py @@ -118,10 +118,7 @@ def _create_branch(self, owner: str, repo: str, branch_name: str, base_branch: s # Create the new branch url = f"https://api.github.com/repos/{owner}/{repo}/git/refs" - data = { - "ref": f"refs/heads/{branch_name}", - "sha": base_sha - } + data = {"ref": f"refs/heads/{branch_name}", "sha": base_sha} response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) response.raise_for_status() return True @@ -150,10 +147,7 @@ def _create_commit( # Create a blob with the file content url = f"https://api.github.com/repos/{owner}/{repo}/git/blobs" - data = { - "content": content, - "encoding": "base64" - } + data = {"content": content, "encoding": "base64"} response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) response.raise_for_status() blob_sha = response.json()["sha"] @@ -162,14 +156,7 @@ def _create_commit( url = f"https://api.github.com/repos/{owner}/{repo}/git/trees" data = { "base_tree": current_sha, - "tree": [ - { - "path": file_path, - "mode": "100644", - "type": "blob", - "sha": blob_sha - } - ] + "tree": [{"path": file_path, "mode": "100644", "type": "blob", "sha": blob_sha}], } response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) response.raise_for_status() @@ -177,20 +164,14 @@ def _create_commit( # Create the commit url = f"https://api.github.com/repos/{owner}/{repo}/git/commits" - data = { - "message": message, - "tree": tree_sha, - "parents": [current_sha] - } + data = {"message": message, "tree": tree_sha, "parents": [current_sha]} response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) response.raise_for_status() commit_sha = response.json()["sha"] # Update the branch reference url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}" - data = { - "sha": commit_sha - } + data = {"sha": commit_sha} response = requests.patch(url, headers=self._get_headers(), json=data, timeout=10) response.raise_for_status() return True @@ -211,12 +192,7 @@ def _create_pull_request( ) -> bool: """Create a pull request.""" url = f"https://api.github.com/repos/{owner}/{repo}/pulls" - data = { - "title": title, - "body": body, - "head": branch_name, - "base": base_branch - } + data = {"title": title, "body": body, "head": branch_name, "base": base_branch} try: response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) response.raise_for_status() diff --git a/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py index 89e1f764d4..a15122f429 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py @@ -97,7 +97,7 @@ def _check_fork_status(self, fork_path: str) -> bool: response = requests.get( url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - timeout=10 + timeout=10, ) return response.status_code == 200 # noqa: PLR2004 except requests.RequestException: @@ -112,9 +112,7 @@ def _get_authenticated_user(self) -> str: """ url = "https://api.github.com/user" response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - timeout=10 + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 ) response.raise_for_status() return response.json()["login"] @@ -131,7 +129,7 @@ def _get_existing_repository(self, repo_name: str) -> Optional[str]: response = requests.get( url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - timeout=10 + timeout=10, ) if response.status_code == 200: # noqa: PLR2004 return repo_name @@ -152,7 +150,7 @@ def _sync_fork(self, fork_path: str) -> None: url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, json={"branch": "main"}, - timeout=10 + timeout=10, ) response.raise_for_status() @@ -167,9 +165,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # First, get the default branch SHA url = f"https://api.github.com/repos/{fork_path}" response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - timeout=10 + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 ) response.raise_for_status() default_branch = response.json()["default_branch"] @@ -177,9 +173,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # Get the SHA of the default branch url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" response = requests.get( - url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, - timeout=10 + url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 ) response.raise_for_status() sha = response.json()["object"]["sha"] @@ -191,7 +185,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, json={"ref": f"refs/heads/{branch_name}", "sha": sha}, - timeout=10 + timeout=10, ) response.raise_for_status() From c4beece2af3d82227f44952109ef3c18b622c876 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 16:24:32 +0200 Subject: [PATCH 12/51] lint:all --- .../components/connectors/file_editor.py | 16 +++++++++++----- .../components/connectors/pr_creator.py | 6 +++--- .../components/connectors/repo_viewer.py | 10 +++++++++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py index e3563d33da..500d63690a 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py @@ -127,7 +127,8 @@ def _update_file(self, owner: str, repo: str, path: str, content: str, message: def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: """Check if last commit was made by the current token user.""" url = f"https://api.github.com/repos/{owner}/{repo}/commits" - response = requests.get(url, headers=self.headers, params={"per_page": 1, "sha": branch}, timeout=10) + params: Dict[str, Union[str, int]] = {"per_page": 1, "sha": branch} + response = requests.get(url, headers=self.headers, params=params, timeout=10) response.raise_for_status() last_commit = response.json()[0] commit_author = last_commit["author"]["login"] @@ -161,7 +162,7 @@ def _edit_file(self, owner: str, repo: str, payload: Dict[str, str], branch: str raise return f"Error: {e!s}" - def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: str) -> str: + def _undo_changes(self, owner: str, repo: str, payload: Dict[str, Any], branch: str) -> str: """Handle undoing changes.""" try: if not self._check_last_commit(owner, repo, branch): @@ -172,9 +173,8 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, str], branch: commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits" # Get the previous commit SHA - commits = requests.get( - commits_url, headers=self.headers, params={"per_page": 2, "sha": branch}, timeout=10 - ).json() + params: Dict[str, Union[str, int]] = {"per_page": 2, "sha": branch} + commits = requests.get(commits_url, headers=self.headers, params=params, timeout=10).json() previous_sha = commits[1]["sha"] # Update branch reference to previous commit @@ -239,6 +239,8 @@ def run( :param repo: Repository in owner/repo format (overrides default if provided) :param branch: Branch to perform operations on (overrides default if provided) :return: Dictionary containing operation result + + :raises ValueError: If command is not a valid Command enum value """ if repo is None: if self.default_repo is None: @@ -250,6 +252,10 @@ def run( working_branch = branch if branch is not None else self.default_branch owner, repo_name = repo.split("/") + # Convert string command to Command enum if needed + if isinstance(command, str): + command = Command(command.lower()) + command_handlers = { Command.EDIT: self._edit_file, Command.UNDO: self._undo_changes, diff --git a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py index e803d9f666..b73f7db32d 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py @@ -1,5 +1,5 @@ import re -from typing import Any, Dict +from typing import Any, Dict, Optional import requests from haystack import component, default_from_dict, default_to_dict, logging @@ -93,7 +93,7 @@ def _check_fork_exists(self, repo: str, fork_owner: str) -> bool: except requests.RequestException: return False - def _create_fork(self, owner: str, repo: str) -> str: + def _create_fork(self, owner: str, repo: str) -> Optional[str]: """Create a fork of the repository.""" url = f"https://api.github.com/repos/{owner}/{repo}/forks" try: @@ -147,7 +147,7 @@ def _create_commit( # Create a blob with the file content url = f"https://api.github.com/repos/{owner}/{repo}/git/blobs" - data = {"content": content, "encoding": "base64"} + data: dict[str, Any] = {"content": content, "encoding": "base64"} response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) response.raise_for_status() blob_sha = response.json()["sha"] diff --git a/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py index b7f65b8433..257bda7580 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py @@ -82,6 +82,8 @@ def __init__( :param github_token: GitHub personal access token for API authentication :param raise_on_failure: If True, raises exceptions on API errors :param max_file_size: Maximum file size in bytes to fetch (default: 1MB) + :param repo: Repository in format "owner/repo" + :param branch: Git reference (branch, tag, commit) to use """ if github_token is not None and not isinstance(github_token, Secret): msg = "github_token must be a Secret" @@ -204,8 +206,14 @@ def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = Non """ if repo is None: repo = self.repo + if repo is None: + msg = "Repository not provided in initialization or run() method" + raise ValueError(msg) if branch is None: branch = self.branch + if branch is None: + msg = "Branch not provided in initialization or run() method" + raise ValueError(msg) try: owner, repo_name = self._parse_repo(repo) @@ -245,7 +253,7 @@ def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = Non except Exception as e: error_doc = self._create_error_document( - f"Error processing repository path {path}: {e!s}. Seems like the file does not exist.", path + Exception(f"Error processing repository path {path}: {e!s}. Seems like the file does not exist."), path ) if self.raise_on_failure: raise From 4a6b81ba461493a1c83eab7c72352660a8bd8119 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 16 Apr 2025 16:36:31 +0200 Subject: [PATCH 13/51] replace StrEnum for py 3.9+ compatibility --- .../components/connectors/file_editor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py index 500d63690a..40ad9829cf 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/file_editor.py @@ -1,5 +1,5 @@ from base64 import b64decode, b64encode -from enum import StrEnum +from enum import Enum from typing import Any, Dict, Optional, Union import requests @@ -9,7 +9,7 @@ logger = logging.getLogger(__name__) -class Command(StrEnum): +class Command(str, Enum): """ Available commands for file operations in GitHub. From e77a49e88a0dcb196b720ca012983c11deb9c763 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 24 Apr 2025 18:28:14 +0200 Subject: [PATCH 14/51] move files --- .../components/connectors/{ => github}/file_editor.py | 0 .../components/connectors/{ => github}/issue_commenter.py | 0 .../components/connectors/{ => github}/issue_viewer.py | 0 .../components/connectors/{ => github}/pr_creator.py | 0 .../components/connectors/{ => github}/repo_viewer.py | 0 .../components/connectors/{ => github}/repository_forker.py | 0 .../{components/prompts => prompts/github}/__init__.py | 0 .../{components/prompts => prompts/github}/comment_tool.py | 0 .../{components/prompts => prompts/github}/context.py | 0 .../{components/prompts => prompts/github}/file_editor_tool.py | 0 .../{components/prompts => prompts/github}/pr_system_prompt.py | 0 .../{components/prompts => prompts/github}/repo_viewer_tool.py | 0 .../{components/prompts => prompts/github}/system_prompt.py | 0 13 files changed, 0 insertions(+), 0 deletions(-) rename integrations/github/src/haystack_integrations/components/connectors/{ => github}/file_editor.py (100%) rename integrations/github/src/haystack_integrations/components/connectors/{ => github}/issue_commenter.py (100%) rename integrations/github/src/haystack_integrations/components/connectors/{ => github}/issue_viewer.py (100%) rename integrations/github/src/haystack_integrations/components/connectors/{ => github}/pr_creator.py (100%) rename integrations/github/src/haystack_integrations/components/connectors/{ => github}/repo_viewer.py (100%) rename integrations/github/src/haystack_integrations/components/connectors/{ => github}/repository_forker.py (100%) rename integrations/github/src/haystack_integrations/{components/prompts => prompts/github}/__init__.py (100%) rename integrations/github/src/haystack_integrations/{components/prompts => prompts/github}/comment_tool.py (100%) rename integrations/github/src/haystack_integrations/{components/prompts => prompts/github}/context.py (100%) rename integrations/github/src/haystack_integrations/{components/prompts => prompts/github}/file_editor_tool.py (100%) rename integrations/github/src/haystack_integrations/{components/prompts => prompts/github}/pr_system_prompt.py (100%) rename integrations/github/src/haystack_integrations/{components/prompts => prompts/github}/repo_viewer_tool.py (100%) rename integrations/github/src/haystack_integrations/{components/prompts => prompts/github}/system_prompt.py (100%) diff --git a/integrations/github/src/haystack_integrations/components/connectors/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/connectors/file_editor.py rename to integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/connectors/issue_commenter.py rename to integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/connectors/issue_viewer.py rename to integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/connectors/pr_creator.py rename to integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/connectors/repo_viewer.py rename to integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/connectors/repository_forker.py rename to integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py diff --git a/integrations/github/src/haystack_integrations/components/prompts/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/prompts/__init__.py rename to integrations/github/src/haystack_integrations/prompts/github/__init__.py diff --git a/integrations/github/src/haystack_integrations/components/prompts/comment_tool.py b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/prompts/comment_tool.py rename to integrations/github/src/haystack_integrations/prompts/github/comment_tool.py diff --git a/integrations/github/src/haystack_integrations/components/prompts/context.py b/integrations/github/src/haystack_integrations/prompts/github/context.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/prompts/context.py rename to integrations/github/src/haystack_integrations/prompts/github/context.py diff --git a/integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/prompts/file_editor_tool.py rename to integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py diff --git a/integrations/github/src/haystack_integrations/components/prompts/pr_system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/prompts/pr_system_prompt.py rename to integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py diff --git a/integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/prompts/repo_viewer_tool.py rename to integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py diff --git a/integrations/github/src/haystack_integrations/components/prompts/system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py similarity index 100% rename from integrations/github/src/haystack_integrations/components/prompts/system_prompt.py rename to integrations/github/src/haystack_integrations/prompts/github/system_prompt.py From 4e4908176342023bba26ee5684df672d7054da6a Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 24 Apr 2025 21:25:10 +0200 Subject: [PATCH 15/51] fix tests --- .../connectors/github/repo_viewer.py | 24 +-- integrations/github/tests/test_file_editor.py | 126 +++++++------ .../github/tests/test_issue_commenter.py | 56 +++--- .../github/tests/test_issue_viewer.py | 57 +++--- integrations/github/tests/test_pr_creator.py | 123 ++++++++----- integrations/github/tests/test_repo_viewer.py | 68 ++++--- .../github/tests/test_repository_forker.py | 169 +++++++++++++----- 7 files changed, 398 insertions(+), 225 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index 257bda7580..a7bb03c888 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -3,8 +3,8 @@ from typing import Any, Dict, List, Optional import requests -from haystack import Document, component, logging -from haystack.utils import Secret +from haystack import Document, component, default_from_dict, logging, default_to_dict +from haystack.utils import Secret, deserialize_secrets_inplace logger = logging.getLogger(__name__) @@ -106,11 +106,14 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - return { - "github_token": self.github_token.to_dict() if self.github_token else None, - "raise_on_failure": self.raise_on_failure, - "max_file_size": self.max_file_size, - } + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + max_file_size=self.max_file_size, + repo=self.repo, + branch=self.branch, + ) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GithubRepositoryViewer": @@ -120,10 +123,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "GithubRepositoryViewer": :param data: Dictionary to deserialize from. :returns: Deserialized component. """ - init_params = data.copy() - if init_params["github_token"]: - init_params["github_token"] = Secret.from_dict(init_params["github_token"]) - return cls(**init_params) + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) def _parse_repo(self, repo: str) -> tuple[str, str]: """Parse owner/repo string""" diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py index c0d5f466e8..7c1605d0d0 100644 --- a/integrations/github/tests/test_file_editor.py +++ b/integrations/github/tests/test_file_editor.py @@ -11,9 +11,12 @@ class TestGithubFileEditor: - def test_init_default(self): + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + editor = GithubFileEditor() - assert editor.github_token is None + assert editor.github_token is not None + assert editor.github_token.resolve_value() == "test-token" assert editor.default_repo is None assert editor.default_branch == "main" assert editor.raise_on_failure is True @@ -26,33 +29,48 @@ def test_init_with_parameters(self): assert editor.default_branch == "feature" assert editor.raise_on_failure is False - # Test with invalid token type with pytest.raises(TypeError): GithubFileEditor(github_token="not_a_secret") - def test_to_dict(self): - token = Secret.from_token("test_token") - editor = GithubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) - - result = editor.to_dict() - - assert result["github_token"]["type"] == "haystack.utils.Secret" - assert result["repo"] == "owner/repo" - assert result["branch"] == "feature" - assert result["raise_on_failure"] is False + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + + token = Secret.from_env_var("ENV_VAR") + + editor = GithubFileEditor( + github_token=token, + repo="owner/repo", + branch="feature", + raise_on_failure=False + ) + + data = editor.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.file_editor.GithubFileEditor", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "repo": "owner/repo", + "branch": "feature", + "raise_on_failure": False + } + } - def test_from_dict(self): + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") data = { - "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, - "repo": "owner/repo", - "branch": "feature", - "raise_on_failure": False, + "type": "haystack_integrations.components.connectors.github.file_editor.GithubFileEditor", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "repo": "owner/repo", + "branch": "feature", + "raise_on_failure": False + } } editor = GithubFileEditor.from_dict(data) - assert isinstance(editor.github_token, Secret) - assert editor.github_token.resolve_value() == "test_token" + assert editor.github_token == Secret.from_env_var("ENV_VAR") assert editor.default_repo == "owner/repo" assert editor.default_branch == "feature" assert editor.raise_on_failure is False @@ -60,14 +78,11 @@ def test_from_dict(self): @patch("requests.get") @patch("requests.put") def test_run_edit(self, mock_put, mock_get): - # Mock the file content response mock_get.return_value.json.return_value = { "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" "sha": "abc123", } mock_get.return_value.raise_for_status.return_value = None - - # Mock the update response mock_put.return_value.raise_for_status.return_value = None token = Secret.from_token("test_token") @@ -82,7 +97,6 @@ def test_run_edit(self, mock_put, mock_get): assert result["result"] == "Edit successful" - # Verify the API calls mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/test.txt", headers={ @@ -91,6 +105,7 @@ def test_run_edit(self, mock_put, mock_get): "Authorization": "Bearer test_token", }, params={"ref": "main"}, + timeout=10, ) mock_put.assert_called_once() @@ -103,23 +118,35 @@ def test_run_edit(self, mock_put, mock_get): @patch("requests.get") @patch("requests.patch") def test_run_undo(self, mock_patch, mock_get): - # Mock the user check response - mock_get.return_value.json.return_value = {"login": "testuser"} - mock_get.return_value.raise_for_status.return_value = None + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + raise requests.RequestException(f"HTTP {self.status_code}") + return None + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "testuser"}), + "https://api.github.com/repos/owner/repo/commits": create_mock_response([ + {"author": {"login": "testuser"}, "sha": "abc123"}, + {"author": {"login": "testuser"}, "sha": "def456"} + ]), + } + + def get_side_effect(url, **kwargs): + return get_responses.get(url, create_mock_response({})) + + mock_get.side_effect = get_side_effect - # Mock the commits response - mock_get.side_effect = [ - type( - "Response", (), {"json": lambda: [{"author": {"login": "testuser"}}], "raise_for_status": lambda: None} - ), - type( - "Response", - (), - {"json": lambda: [{"sha": "abc123"}, {"sha": "def456"}], "raise_for_status": lambda: None}, - ), - ] - - # Mock the update response mock_patch.return_value.raise_for_status.return_value = None token = Secret.from_token("test_token") @@ -131,8 +158,7 @@ def test_run_undo(self, mock_patch, mock_get): assert result["result"] == "Successfully undid last change" - # Verify the API calls - assert mock_get.call_count == 3 + assert mock_get.call_count == 3 # One for commits, one for user info, one for last commit check mock_patch.assert_called_once_with( "https://api.github.com/repos/owner/repo/git/refs/heads/main", headers={ @@ -141,11 +167,11 @@ def test_run_undo(self, mock_patch, mock_get): "Authorization": "Bearer test_token", }, json={"sha": "def456", "force": True}, + timeout=10 ) @patch("requests.put") def test_run_create(self, mock_put): - # Mock the create response mock_put.return_value.raise_for_status.return_value = None token = Secret.from_token("test_token") @@ -160,7 +186,6 @@ def test_run_create(self, mock_put): assert result["result"] == "File created successfully" - # Verify the API call mock_put.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/new.txt", headers={ @@ -173,16 +198,18 @@ def test_run_create(self, mock_put): "content": "TmV3IGZpbGUgY29udGVudA==", # Base64 encoded "New file content" "branch": "main", }, + timeout=10 ) @patch("requests.get") @patch("requests.delete") def test_run_delete(self, mock_delete, mock_get): - # Mock the file content response - mock_get.return_value.json.return_value = {"sha": "abc123"} + mock_get.return_value.json.return_value = { + "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" + "sha": "abc123" + } mock_get.return_value.raise_for_status.return_value = None - # Mock the delete response mock_delete.return_value.raise_for_status.return_value = None token = Secret.from_token("test_token") @@ -197,7 +224,6 @@ def test_run_delete(self, mock_delete, mock_get): assert result["result"] == "File deleted successfully" - # Verify the API calls mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/test.txt", headers={ @@ -206,6 +232,7 @@ def test_run_delete(self, mock_delete, mock_get): "Authorization": "Bearer test_token", }, params={"ref": "main"}, + timeout=10 ) mock_delete.assert_called_once_with( @@ -216,11 +243,11 @@ def test_run_delete(self, mock_delete, mock_get): "Authorization": "Bearer test_token", }, json={"message": "Delete file", "sha": "abc123", "branch": "main"}, + timeout=10 ) @patch("requests.get") def test_run_error_handling(self, mock_get): - # Mock an error response mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") @@ -235,7 +262,6 @@ def test_run_error_handling(self, mock_get): assert "Error: API Error" in result["result"] - # Test with raise_on_failure=True editor = GithubFileEditor(github_token=token, raise_on_failure=True) with pytest.raises(requests.RequestException): editor.run( diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py index be5d730d0f..0c1b6cd7b7 100644 --- a/integrations/github/tests/test_issue_commenter.py +++ b/integrations/github/tests/test_issue_commenter.py @@ -11,9 +11,12 @@ class TestGithubIssueCommenter: - def test_init_default(self): + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + commenter = GithubIssueCommenter() assert commenter.github_token is not None + assert commenter.github_token.resolve_value() == "test-token" assert commenter.raise_on_failure is True assert commenter.retry_attempts == 2 @@ -24,40 +27,48 @@ def test_init_with_parameters(self): assert commenter.raise_on_failure is False assert commenter.retry_attempts == 3 - def test_to_dict(self): - token = Secret.from_token("test_token") - commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) - - result = commenter.to_dict() - - assert ( - result["type"] == "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter" + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + + token = Secret.from_env_var("ENV_VAR") + + commenter = GithubIssueCommenter( + github_token=token, + raise_on_failure=False, + retry_attempts=3 ) - assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" - assert result["init_parameters"]["raise_on_failure"] is False - assert result["init_parameters"]["retry_attempts"] == 3 + + data = commenter.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "retry_attempts": 3 + } + } - def test_from_dict(self): + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + data = { "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", "init_parameters": { - "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, - "retry_attempts": 3, - }, + "retry_attempts": 3 + } } commenter = GithubIssueCommenter.from_dict(data) - assert isinstance(commenter.github_token, Secret) - assert commenter.github_token.resolve_value() == "test_token" + assert commenter.github_token == Secret.from_env_var("ENV_VAR") assert commenter.raise_on_failure is False assert commenter.retry_attempts == 3 @patch("requests.post") def test_run(self, mock_post): - """Test the run method.""" - # Mock the successful response mock_post.return_value.raise_for_status.return_value = None token = Secret.from_token("test_token") @@ -67,7 +78,6 @@ def test_run(self, mock_post): assert result["success"] is True - # Verify the API call mock_post.assert_called_once_with( "https://api.github.com/repos/owner/repo/issues/123/comments", headers={ @@ -76,11 +86,11 @@ def test_run(self, mock_post): "Authorization": "Bearer test_token", }, json={"body": "Test comment"}, + timeout=10, ) @patch("requests.post") def test_run_error_handling(self, mock_post): - # Mock an error response mock_post.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") @@ -90,7 +100,6 @@ def test_run_error_handling(self, mock_post): assert result["success"] is False - # Test with raise_on_failure=True commenter = GithubIssueCommenter(github_token=token, raise_on_failure=True) with pytest.raises(requests.RequestException): commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") @@ -104,6 +113,5 @@ def test_parse_github_url(self): assert repo == "repo" assert issue_number == 123 - # Test with invalid URL with pytest.raises(ValueError): commenter._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py index f25a079728..aa015d7bed 100644 --- a/integrations/github/tests/test_issue_viewer.py +++ b/integrations/github/tests/test_issue_viewer.py @@ -17,45 +17,57 @@ def test_init_default(self): assert viewer.raise_on_failure is True assert viewer.retry_attempts == 2 - def test_init_with_parameters(self): - token = Secret.from_token("test_token") + def test_init_with_parameters(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + token = Secret.from_env_var("GITHUB_TOKEN") viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) assert viewer.github_token == token assert viewer.raise_on_failure is False assert viewer.retry_attempts == 3 - def test_to_dict(self): - token = Secret.from_token("test_token") - viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) - - result = viewer.to_dict() - - assert result["type"] == "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer" - assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" - assert result["init_parameters"]["raise_on_failure"] is False - assert result["init_parameters"]["retry_attempts"] == 3 + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + + token = Secret.from_env_var("ENV_VAR") + + viewer = GithubIssueViewer( + github_token=token, + raise_on_failure=False, + retry_attempts=3 + ) + + data = viewer.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "retry_attempts": 3 + } + } - def test_from_dict(self): + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + data = { "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", "init_parameters": { - "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, - "retry_attempts": 3, - }, + "retry_attempts": 3 + } } viewer = GithubIssueViewer.from_dict(data) - assert isinstance(viewer.github_token, Secret) - assert viewer.github_token.resolve_value() == "test_token" + assert viewer.github_token == Secret.from_env_var("ENV_VAR") assert viewer.raise_on_failure is False assert viewer.retry_attempts == 3 @patch("requests.get") def test_run(self, mock_get): - """Test the run method.""" - # Mock the issue response mock_get.return_value.json.return_value = { "body": "Issue body", "title": "Issue title", @@ -70,7 +82,6 @@ def test_run(self, mock_get): } mock_get.return_value.raise_for_status.return_value = None - # Mock the comments response mock_get.side_effect = [ mock_get.return_value, # First call for issue type( @@ -108,12 +119,10 @@ def test_run(self, mock_get): assert result["documents"][1].meta["type"] == "comment" assert result["documents"][2].meta["type"] == "comment" - # Verify the API calls assert mock_get.call_count == 2 @patch("requests.get") def test_run_error_handling(self, mock_get): - # Mock an error response mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") @@ -125,7 +134,6 @@ def test_run_error_handling(self, mock_get): assert result["documents"][0].meta["type"] == "error" assert result["documents"][0].meta["error"] is True - # Test with raise_on_failure=True viewer = GithubIssueViewer(github_token=token, raise_on_failure=True) with pytest.raises(requests.RequestException): viewer.run(url="https://github.com/owner/repo/issues/123") @@ -139,6 +147,5 @@ def test_parse_github_url(self): assert repo == "repo" assert issue_number == 123 - # Test with invalid URL with pytest.raises(ValueError): viewer._parse_github_url("https://github.com/invalid/url") diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py index 10089ba7ff..b9855a7d92 100644 --- a/integrations/github/tests/test_pr_creator.py +++ b/integrations/github/tests/test_pr_creator.py @@ -11,9 +11,12 @@ class TestGithubPRCreator: - def test_init_default(self): + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + pr_creator = GithubPRCreator() assert pr_creator.github_token is not None + assert pr_creator.github_token.resolve_value() == "test-token" assert pr_creator.raise_on_failure is True def test_init_with_parameters(self): @@ -22,85 +25,111 @@ def test_init_with_parameters(self): assert pr_creator.github_token == token assert pr_creator.raise_on_failure is False - # Test with invalid token type with pytest.raises(TypeError): GithubPRCreator(github_token="not_a_secret") - def test_to_dict(self): - token = Secret.from_token("test_token") - pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) - - result = pr_creator.to_dict() - - assert result["type"] == "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator" - assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" - assert result["init_parameters"]["raise_on_failure"] is False + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + + token = Secret.from_env_var("ENV_VAR") + + pr_creator = GithubPRCreator( + github_token=token, + raise_on_failure=False + ) + + data = pr_creator.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False + } + } - def test_from_dict(self): + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + data = { "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", "init_parameters": { - "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, - "raise_on_failure": False, - }, + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False + } } pr_creator = GithubPRCreator.from_dict(data) - assert isinstance(pr_creator.github_token, Secret) - assert pr_creator.github_token.resolve_value() == "test_token" + assert pr_creator.github_token == Secret.from_env_var("ENV_VAR") assert pr_creator.raise_on_failure is False @patch("requests.get") @patch("requests.post") def test_run(self, mock_post, mock_get): - # Mock the authenticated user response mock_get.return_value.json.return_value = {"login": "test_user"} mock_get.return_value.raise_for_status.return_value = None - # Mock the PR creation response mock_post.return_value.json.return_value = {"number": 123} mock_post.return_value.raise_for_status.return_value = None token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token) + + with patch.object(pr_creator, '_check_fork_exists', return_value=True): + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", + body="Test body", + draft=False, + ) - result = pr_creator.run( - issue_url="https://github.com/owner/repo/issues/456", - title="Test PR", - branch="feature-branch", - base="main", - body="Test body", - draft=False, - ) - - assert result["result"] == "Pull request #123 created successfully and linked to issue #456" - - # Verify the API calls - mock_get.assert_called_once() - mock_post.assert_called_once() + assert result["result"] == "Pull request #123 created successfully and linked to issue #456" + + mock_get.assert_called_once() + mock_post.assert_called_once_with( + "https://api.github.com/repos/owner/repo/pulls", + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GithubPRCreator", + "Authorization": "Bearer test_token", + }, + json={ + "title": "Test PR", + "body": "Test body", + "head": "test_user:feature-branch", + "base": "main", + "draft": False, + "maintainer_can_modify": True, + }, + timeout=10, + ) @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, mock_get): - # Mock an error response + def test_run_error_handling(self, mock_post, mock_get): mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + + with patch.object(pr_creator, '_check_fork_exists', return_value=True): + result = pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main" + ) - result = pr_creator.run( - issue_url="https://github.com/owner/repo/issues/456", title="Test PR", branch="feature-branch", base="main" - ) - - assert "Error" in result["result"] + assert "Error" in result["result"] - # Test with raise_on_failure=True pr_creator = GithubPRCreator(github_token=token, raise_on_failure=True) with pytest.raises(requests.RequestException): - pr_creator.run( - issue_url="https://github.com/owner/repo/issues/456", - title="Test PR", - branch="feature-branch", - base="main", - ) + pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", + ) diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index 47511a193b..a36e4d2642 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -11,7 +11,9 @@ class TestGithubRepositoryViewer: - def test_init_default(self): + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + viewer = GithubRepositoryViewer() assert viewer.github_token is None assert viewer.raise_on_failure is True @@ -30,37 +32,59 @@ def test_init_with_parameters(self): assert viewer.repo == "owner/repo" assert viewer.branch == "main" - # Test with invalid token type with pytest.raises(TypeError): GithubRepositoryViewer(github_token="not_a_secret") - def test_to_dict(self): - token = Secret.from_token("test_token") - viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=False, max_file_size=500_000) - - result = viewer.to_dict() - - assert result["github_token"]["type"] == "haystack.utils.Secret" - assert result["raise_on_failure"] is False - assert result["max_file_size"] == 500_000 + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + + token = Secret.from_env_var("ENV_VAR") + + viewer = GithubRepositoryViewer( + github_token=token, + raise_on_failure=False, + max_file_size=500_000, + repo="owner/repo", + branch="main" + ) + + data = viewer.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.repo_viewer.GithubRepositoryViewer", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "max_file_size": 500_000, + "repo": "owner/repo", + "branch": "main" + } + } - def test_from_dict(self): + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + data = { - "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, - "raise_on_failure": False, - "max_file_size": 500_000, + "type": "haystack_integrations.components.connectors.github.repo_viewer.GithubRepositoryViewer", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "max_file_size": 500_000, + "repo": "owner/repo", + "branch": "main" + } } viewer = GithubRepositoryViewer.from_dict(data) - assert isinstance(viewer.github_token, Secret) - assert viewer.github_token.resolve_value() == "test_token" + assert viewer.github_token == Secret.from_env_var("ENV_VAR") assert viewer.raise_on_failure is False assert viewer.max_file_size == 500_000 + assert viewer.repo == "owner/repo" + assert viewer.branch == "main" @patch("requests.get") def test_run_file(self, mock_get): - # Mock the file response mock_get.return_value.json.return_value = { "name": "README.md", "path": "README.md", @@ -81,7 +105,6 @@ def test_run_file(self, mock_get): assert result["documents"][0].meta["type"] == "file_content" assert result["documents"][0].meta["path"] == "README.md" - # Verify the API call mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/README.md?ref=main", headers={ @@ -89,11 +112,11 @@ def test_run_file(self, mock_get): "User-Agent": "Haystack/GithubRepositoryViewer", "Authorization": "Bearer test_token", }, + timeout=10, ) @patch("requests.get") def test_run_directory(self, mock_get): - # Mock the directory response mock_get.return_value.json.return_value = [ {"name": "docs", "path": "docs", "type": "dir", "html_url": "https://github.com/owner/repo/tree/main/docs"}, { @@ -117,7 +140,6 @@ def test_run_directory(self, mock_get): assert result["documents"][1].content == "README.md" assert result["documents"][1].meta["type"] == "file" - # Verify the API call mock_get.assert_called_once_with( "https://api.github.com/repos/owner/repo/contents/?ref=main", headers={ @@ -125,11 +147,11 @@ def test_run_directory(self, mock_get): "User-Agent": "Haystack/GithubRepositoryViewer", "Authorization": "Bearer test_token", }, + timeout=10, ) @patch("requests.get") def test_run_error_handling(self, mock_get): - # Mock an error response mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") @@ -140,7 +162,6 @@ def test_run_error_handling(self, mock_get): assert len(result["documents"]) == 1 assert result["documents"][0].meta["type"] == "error" - # Test with raise_on_failure=True viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=True) with pytest.raises(requests.RequestException): viewer.run(repo="owner/repo", path="README.md", branch="main") @@ -153,6 +174,5 @@ def test_parse_repo(self): assert owner == "owner" assert repo == "repo" - # Test with invalid format with pytest.raises(ValueError): viewer._parse_repo("invalid_format") diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py index 5ff9e9a67d..5e4f2c3dc2 100644 --- a/integrations/github/tests/test_repository_forker.py +++ b/integrations/github/tests/test_repository_forker.py @@ -11,9 +11,12 @@ class TestGithubRepoForker: - def test_init_default(self): + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + forker = GithubRepoForker() assert forker.github_token is not None + assert forker.github_token.resolve_value() == "test-token" assert forker.raise_on_failure is True assert forker.wait_for_completion is False assert forker.max_wait_seconds == 300 @@ -44,8 +47,11 @@ def test_init_with_parameters(self): with pytest.raises(TypeError): GithubRepoForker(github_token="not_a_secret") - def test_to_dict(self): - token = Secret.from_token("test_token") + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + + token = Secret.from_env_var("ENV_VAR") + forker = GithubRepoForker( github_token=token, raise_on_failure=False, @@ -53,38 +59,43 @@ def test_to_dict(self): max_wait_seconds=60, poll_interval=1, auto_sync=False, - create_branch=False, + create_branch=False ) + + data = forker.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "wait_for_completion": True, + "max_wait_seconds": 60, + "poll_interval": 1, + "auto_sync": False, + "create_branch": False + } + } - result = forker.to_dict() - - assert result["type"] == "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker" - assert result["init_parameters"]["github_token"]["type"] == "haystack.utils.Secret" - assert result["init_parameters"]["raise_on_failure"] is False - assert result["init_parameters"]["wait_for_completion"] is True - assert result["init_parameters"]["max_wait_seconds"] == 60 - assert result["init_parameters"]["poll_interval"] == 1 - assert result["init_parameters"]["auto_sync"] is False - assert result["init_parameters"]["create_branch"] is False - - def test_from_dict(self): + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test_token") + data = { "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", "init_parameters": { - "github_token": {"type": "haystack.utils.Secret", "token": "test_token"}, + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, "wait_for_completion": True, "max_wait_seconds": 60, "poll_interval": 1, "auto_sync": False, - "create_branch": False, - }, + "create_branch": False + } } forker = GithubRepoForker.from_dict(data) - assert isinstance(forker.github_token, Secret) - assert forker.github_token.resolve_value() == "test_token" + assert forker.github_token == Secret.from_env_var("ENV_VAR") assert forker.raise_on_failure is False assert forker.wait_for_completion is True assert forker.max_wait_seconds == 60 @@ -95,13 +106,45 @@ def test_from_dict(self): @patch("requests.get") @patch("requests.post") def test_run_create_fork(self, mock_post, mock_get): - # Mock the authenticated user response - mock_get.return_value.json.return_value = {"login": "test_user"} - mock_get.return_value.raise_for_status.return_value = None + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + raise requests.RequestException(f"HTTP {self.status_code}") + return None + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "test_user"}), + "https://api.github.com/repos/test_user/repo": create_mock_response({}, status_code=404), # Fork doesn't exist + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response({"object": {"sha": "abc123"}}), + } + + def get_side_effect(url, **kwargs): + if url == "https://api.github.com/repos/test_user/repo": + if mock_get.call_count == 2: + return create_mock_response({}, status_code=404) + return create_mock_response({"default_branch": "main"}) + return get_responses.get(url, create_mock_response({"default_branch": "main"})) + + mock_get.side_effect = get_side_effect - # Mock the fork creation response - mock_post.return_value.json.return_value = {"owner": {"login": "test_user"}, "name": "repo"} - mock_post.return_value.raise_for_status.return_value = None + def post_side_effect(url, **kwargs): + if "forks" in url: + return create_mock_response({"owner": {"login": "test_user"}, "name": "repo"}) + elif "git/refs" in url: + return create_mock_response({"ref": "refs/heads/fix-123"}) + return create_mock_response({}) + + mock_post.side_effect = post_side_effect token = Secret.from_token("test_token") forker = GithubRepoForker(github_token=token, create_branch=True, auto_sync=False) @@ -111,23 +154,56 @@ def test_run_create_fork(self, mock_post, mock_get): assert result["repo"] == "test_user/repo" assert result["issue_branch"] == "fix-123" - # Verify the API calls - mock_get.assert_called_once() + assert mock_get.call_count == 5 # user (2x), check fork status, get default branch, get SHA + + get_calls = [call[0][0] for call in mock_get.call_args_list] + assert get_calls.count("https://api.github.com/user") == 2 # get user, check fork + assert get_calls.count("https://api.github.com/repos/test_user/repo") == 2 # check status, get default branch + assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls + + post_calls = [call[0][0] for call in mock_post.call_args_list] + assert "https://api.github.com/repos/owner/repo/forks" in post_calls + assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls assert mock_post.call_count == 2 # One for fork creation, one for branch creation @patch("requests.get") @patch("requests.post") def test_run_sync_existing_fork(self, mock_post, mock_get): - # Mock the authenticated user response - mock_get.side_effect = [ - type("Response", (), {"json": lambda: {"login": "test_user"}, "raise_for_status": lambda: None}), - type( - "Response", (), {"status_code": 200, "json": lambda: {"name": "repo"}, "raise_for_status": lambda: None} - ), - ] + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + raise requests.RequestException(f"HTTP {self.status_code}") + return None + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "test_user"}), + "https://api.github.com/repos/test_user/repo": create_mock_response({"name": "repo", "default_branch": "main"}), + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response({"object": {"sha": "abc123"}}), + } + + def get_side_effect(url, **kwargs): + return get_responses.get(url, create_mock_response({"default_branch": "main"})) + + mock_get.side_effect = get_side_effect + + def post_side_effect(url, **kwargs): + if "merge-upstream" in url: + return create_mock_response({}) + elif "git/refs" in url: + return create_mock_response({"ref": "refs/heads/fix-123"}) + return create_mock_response({}) - # Mock the sync response - mock_post.return_value.raise_for_status.return_value = None + mock_post.side_effect = post_side_effect token = Secret.from_token("test_token") forker = GithubRepoForker(github_token=token, create_branch=True, auto_sync=True) @@ -137,14 +213,21 @@ def test_run_sync_existing_fork(self, mock_post, mock_get): assert result["repo"] == "test_user/repo" assert result["issue_branch"] == "fix-123" - # Verify the API calls - assert mock_get.call_count == 2 + assert mock_get.call_count == 5 # user, check fork, check fork status, get default branch, get SHA + + get_calls = [call[0][0] for call in mock_get.call_args_list] + assert "https://api.github.com/user" in get_calls + assert "https://api.github.com/repos/test_user/repo" in get_calls + assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls + + post_calls = [call[0][0] for call in mock_post.call_args_list] + assert "https://api.github.com/repos/test_user/repo/merge-upstream" in post_calls + assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls assert mock_post.call_count == 2 # One for sync, one for branch creation @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, mock_get): - # Mock an error response + def test_run_error_handling(self, mock_post, mock_get): mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") @@ -155,7 +238,6 @@ def test_run_error_handling(self, mock_get): assert result["repo"] == "" assert result["issue_branch"] is None - # Test with raise_on_failure=True forker = GithubRepoForker(github_token=token, raise_on_failure=True) with pytest.raises(requests.RequestException): forker.run(url="https://github.com/owner/repo/issues/123") @@ -169,6 +251,5 @@ def test_parse_github_url(self): assert repo == "repo" assert issue_number == "123" - # Test with invalid URL with pytest.raises(ValueError): forker._parse_github_url("https://github.com/invalid/url") From 71441765e379679c868274a85817a45ea7e68bae Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Thu, 24 Apr 2025 21:32:34 +0200 Subject: [PATCH 16/51] lint --- .../connectors/github/repo_viewer.py | 2 +- .../prompts/github/context.py | 10 +-- integrations/github/tests/test_file_editor.py | 48 ++++++------- .../github/tests/test_issue_commenter.py | 26 +++---- .../github/tests/test_issue_viewer.py | 26 +++---- integrations/github/tests/test_pr_creator.py | 55 +++++++-------- integrations/github/tests/test_repo_viewer.py | 26 +++---- .../github/tests/test_repository_forker.py | 70 ++++++++++--------- 8 files changed, 123 insertions(+), 140 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index a7bb03c888..89e6cfcf9b 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional import requests -from haystack import Document, component, default_from_dict, logging, default_to_dict +from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.utils import Secret, deserialize_secrets_inplace logger = logging.getLogger(__name__) diff --git a/integrations/github/src/haystack_integrations/prompts/github/context.py b/integrations/github/src/haystack_integrations/prompts/github/context.py index ff45fccde9..201477cc13 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/context.py +++ b/integrations/github/src/haystack_integrations/prompts/github/context.py @@ -31,7 +31,7 @@ - `@component`: This decorator marks a class as a component, allowing it to be used in a pipeline. - `run()`: This is a required method in every component. It accepts input arguments and returns a `dict`. The inputs can -either come from the pipeline when it’s executed, or from the output of another component when connected using +either come from the pipeline when it's executed, or from the output of another component when connected using `connect()`. The `run()` method should be compatible with the input/output definitions declared for the component. See an [Extended Example](#extended-example) below to check how it works. @@ -43,13 +43,13 @@ You can choose between three input options: -- `set_input_type`: This method defines or updates a single input socket for a component instance. It’s ideal for adding +- `set_input_type`: This method defines or updates a single input socket for a component instance. It's ideal for adding or modifying a specific input at runtime without affecting others. Use this when you need to dynamically set or modify a single input based on specific conditions. - `set_input_types`: This method allows you to define multiple input sockets at once, replacing any existing inputs. -It’s useful when you know all the inputs the component will need and want to configure them in bulk. Use this when you +It's useful when you know all the inputs the component will need and want to configure them in bulk. Use this when you want to define multiple inputs during initialization. -- Declaring arguments directly in the `run()` method. Use this method when the component’s inputs are static and known +- Declaring arguments directly in the `run()` method. Use this method when the component's inputs are static and known at the time of class definition. ### Outputs @@ -60,7 +60,7 @@ output names and types must match the `dict` returned by the `run()` method. Use this when the output types are static and known in advance. This decorator is cleaner and more readable for static components. - `set_output_types`: This method defines or updates multiple output sockets for a component instance at runtime. -It’s useful when you need flexibility in configuring outputs dynamically. Use this when the output types need to be set +It's useful when you need flexibility in configuring outputs dynamically. Use this when the output types need to be set at runtime for greater flexibility. # Short Example diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py index 7c1605d0d0..7d1a739bdf 100644 --- a/integrations/github/tests/test_file_editor.py +++ b/integrations/github/tests/test_file_editor.py @@ -13,7 +13,7 @@ class TestGithubFileEditor: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - + editor = GithubFileEditor() assert editor.github_token is not None assert editor.github_token.resolve_value() == "test-token" @@ -34,26 +34,21 @@ def test_init_with_parameters(self): def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + token = Secret.from_env_var("ENV_VAR") - - editor = GithubFileEditor( - github_token=token, - repo="owner/repo", - branch="feature", - raise_on_failure=False - ) - + + editor = GithubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) + data = editor.to_dict() - + assert data == { "type": "haystack_integrations.components.connectors.github.file_editor.GithubFileEditor", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "repo": "owner/repo", "branch": "feature", - "raise_on_failure": False - } + "raise_on_failure": False, + }, } def test_from_dict(self, monkeypatch): @@ -64,8 +59,8 @@ def test_from_dict(self, monkeypatch): "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "repo": "owner/repo", "branch": "feature", - "raise_on_failure": False - } + "raise_on_failure": False, + }, } editor = GithubFileEditor.from_dict(data) @@ -129,20 +124,19 @@ def json(self): def raise_for_status(self): if self.status_code >= 400: - raise requests.RequestException(f"HTTP {self.status_code}") - return None + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) return MockResponse(json_data, status_code) get_responses = { "https://api.github.com/user": create_mock_response({"login": "testuser"}), - "https://api.github.com/repos/owner/repo/commits": create_mock_response([ - {"author": {"login": "testuser"}, "sha": "abc123"}, - {"author": {"login": "testuser"}, "sha": "def456"} - ]), + "https://api.github.com/repos/owner/repo/commits": create_mock_response( + [{"author": {"login": "testuser"}, "sha": "abc123"}, {"author": {"login": "testuser"}, "sha": "def456"}] + ), } - def get_side_effect(url, **kwargs): + def get_side_effect(url, **_): return get_responses.get(url, create_mock_response({})) mock_get.side_effect = get_side_effect @@ -167,7 +161,7 @@ def get_side_effect(url, **kwargs): "Authorization": "Bearer test_token", }, json={"sha": "def456", "force": True}, - timeout=10 + timeout=10, ) @patch("requests.put") @@ -198,7 +192,7 @@ def test_run_create(self, mock_put): "content": "TmV3IGZpbGUgY29udGVudA==", # Base64 encoded "New file content" "branch": "main", }, - timeout=10 + timeout=10, ) @patch("requests.get") @@ -206,7 +200,7 @@ def test_run_create(self, mock_put): def test_run_delete(self, mock_delete, mock_get): mock_get.return_value.json.return_value = { "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" - "sha": "abc123" + "sha": "abc123", } mock_get.return_value.raise_for_status.return_value = None @@ -232,7 +226,7 @@ def test_run_delete(self, mock_delete, mock_get): "Authorization": "Bearer test_token", }, params={"ref": "main"}, - timeout=10 + timeout=10, ) mock_delete.assert_called_once_with( @@ -243,7 +237,7 @@ def test_run_delete(self, mock_delete, mock_get): "Authorization": "Bearer test_token", }, json={"message": "Delete file", "sha": "abc123", "branch": "main"}, - timeout=10 + timeout=10, ) @patch("requests.get") diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py index 0c1b6cd7b7..d487a17a8e 100644 --- a/integrations/github/tests/test_issue_commenter.py +++ b/integrations/github/tests/test_issue_commenter.py @@ -13,7 +13,7 @@ class TestGithubIssueCommenter: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - + commenter = GithubIssueCommenter() assert commenter.github_token is not None assert commenter.github_token.resolve_value() == "test-token" @@ -29,36 +29,32 @@ def test_init_with_parameters(self): def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + token = Secret.from_env_var("ENV_VAR") - - commenter = GithubIssueCommenter( - github_token=token, - raise_on_failure=False, - retry_attempts=3 - ) - + + commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) + data = commenter.to_dict() - + assert data == { "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, - "retry_attempts": 3 - } + "retry_attempts": 3, + }, } def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + data = { "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, - "retry_attempts": 3 - } + "retry_attempts": 3, + }, } commenter = GithubIssueCommenter.from_dict(data) diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py index aa015d7bed..a23d0e4d36 100644 --- a/integrations/github/tests/test_issue_viewer.py +++ b/integrations/github/tests/test_issue_viewer.py @@ -19,7 +19,7 @@ def test_init_default(self): def test_init_with_parameters(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - + token = Secret.from_env_var("GITHUB_TOKEN") viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) assert viewer.github_token == token @@ -28,36 +28,32 @@ def test_init_with_parameters(self, monkeypatch): def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + token = Secret.from_env_var("ENV_VAR") - - viewer = GithubIssueViewer( - github_token=token, - raise_on_failure=False, - retry_attempts=3 - ) - + + viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) + data = viewer.to_dict() - + assert data == { "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, - "retry_attempts": 3 - } + "retry_attempts": 3, + }, } def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + data = { "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, - "retry_attempts": 3 - } + "retry_attempts": 3, + }, } viewer = GithubIssueViewer.from_dict(data) diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py index b9855a7d92..cc8bbb1196 100644 --- a/integrations/github/tests/test_pr_creator.py +++ b/integrations/github/tests/test_pr_creator.py @@ -13,7 +13,7 @@ class TestGithubPRCreator: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - + pr_creator = GithubPRCreator() assert pr_creator.github_token is not None assert pr_creator.github_token.resolve_value() == "test-token" @@ -30,33 +30,30 @@ def test_init_with_parameters(self): def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + token = Secret.from_env_var("ENV_VAR") - - pr_creator = GithubPRCreator( - github_token=token, - raise_on_failure=False - ) - + + pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + data = pr_creator.to_dict() - + assert data == { "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, - "raise_on_failure": False - } + "raise_on_failure": False, + }, } def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + data = { "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, - "raise_on_failure": False - } + "raise_on_failure": False, + }, } pr_creator = GithubPRCreator.from_dict(data) @@ -75,8 +72,8 @@ def test_run(self, mock_post, mock_get): token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token) - - with patch.object(pr_creator, '_check_fork_exists', return_value=True): + + with patch.object(pr_creator, "_check_fork_exists", return_value=True): result = pr_creator.run( issue_url="https://github.com/owner/repo/issues/456", title="Test PR", @@ -109,27 +106,27 @@ def test_run(self, mock_post, mock_get): @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, mock_post, mock_get): + def test_run_error_handling(self, _, mock_get): mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) - - with patch.object(pr_creator, '_check_fork_exists', return_value=True): + + with patch.object(pr_creator, "_check_fork_exists", return_value=True): result = pr_creator.run( - issue_url="https://github.com/owner/repo/issues/456", - title="Test PR", - branch="feature-branch", - base="main" + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", ) assert "Error" in result["result"] pr_creator = GithubPRCreator(github_token=token, raise_on_failure=True) with pytest.raises(requests.RequestException): - pr_creator.run( - issue_url="https://github.com/owner/repo/issues/456", - title="Test PR", - branch="feature-branch", - base="main", - ) + pr_creator.run( + issue_url="https://github.com/owner/repo/issues/456", + title="Test PR", + branch="feature-branch", + base="main", + ) diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index a36e4d2642..a68de3dec8 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -13,7 +13,7 @@ class TestGithubRepositoryViewer: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - + viewer = GithubRepositoryViewer() assert viewer.github_token is None assert viewer.raise_on_failure is True @@ -37,19 +37,15 @@ def test_init_with_parameters(self): def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + token = Secret.from_env_var("ENV_VAR") - + viewer = GithubRepositoryViewer( - github_token=token, - raise_on_failure=False, - max_file_size=500_000, - repo="owner/repo", - branch="main" + github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" ) - + data = viewer.to_dict() - + assert data == { "type": "haystack_integrations.components.connectors.github.repo_viewer.GithubRepositoryViewer", "init_parameters": { @@ -57,13 +53,13 @@ def test_to_dict(self, monkeypatch): "raise_on_failure": False, "max_file_size": 500_000, "repo": "owner/repo", - "branch": "main" - } + "branch": "main", + }, } def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + data = { "type": "haystack_integrations.components.connectors.github.repo_viewer.GithubRepositoryViewer", "init_parameters": { @@ -71,8 +67,8 @@ def test_from_dict(self, monkeypatch): "raise_on_failure": False, "max_file_size": 500_000, "repo": "owner/repo", - "branch": "main" - } + "branch": "main", + }, } viewer = GithubRepositoryViewer.from_dict(data) diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py index 5e4f2c3dc2..dde362fdea 100644 --- a/integrations/github/tests/test_repository_forker.py +++ b/integrations/github/tests/test_repository_forker.py @@ -13,7 +13,7 @@ class TestGithubRepoForker: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - + forker = GithubRepoForker() assert forker.github_token is not None assert forker.github_token.resolve_value() == "test-token" @@ -49,9 +49,9 @@ def test_init_with_parameters(self): def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + token = Secret.from_env_var("ENV_VAR") - + forker = GithubRepoForker( github_token=token, raise_on_failure=False, @@ -59,11 +59,11 @@ def test_to_dict(self, monkeypatch): max_wait_seconds=60, poll_interval=1, auto_sync=False, - create_branch=False + create_branch=False, ) - + data = forker.to_dict() - + assert data == { "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", "init_parameters": { @@ -73,13 +73,13 @@ def test_to_dict(self, monkeypatch): "max_wait_seconds": 60, "poll_interval": 1, "auto_sync": False, - "create_branch": False - } + "create_branch": False, + }, } def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test_token") - + data = { "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", "init_parameters": { @@ -89,8 +89,8 @@ def test_from_dict(self, monkeypatch): "max_wait_seconds": 60, "poll_interval": 1, "auto_sync": False, - "create_branch": False - } + "create_branch": False, + }, } forker = GithubRepoForker.from_dict(data) @@ -117,31 +117,33 @@ def json(self): def raise_for_status(self): if self.status_code >= 400: - raise requests.RequestException(f"HTTP {self.status_code}") - return None + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) return MockResponse(json_data, status_code) get_responses = { "https://api.github.com/user": create_mock_response({"login": "test_user"}), - "https://api.github.com/repos/test_user/repo": create_mock_response({}, status_code=404), # Fork doesn't exist - "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response({"object": {"sha": "abc123"}}), + "https://api.github.com/repos/test_user/repo": create_mock_response( + {}, status_code=404 + ), # Fork doesn't exist + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( + {"object": {"sha": "abc123"}} + ), } - def get_side_effect(url, **kwargs): + def get_side_effect(url, **_): if url == "https://api.github.com/repos/test_user/repo": if mock_get.call_count == 2: - return create_mock_response({}, status_code=404) + return create_mock_response({}, status_code=404) # Fork doesn't exist return create_mock_response({"default_branch": "main"}) return get_responses.get(url, create_mock_response({"default_branch": "main"})) mock_get.side_effect = get_side_effect - def post_side_effect(url, **kwargs): + def post_side_effect(url, **_): if "forks" in url: return create_mock_response({"owner": {"login": "test_user"}, "name": "repo"}) - elif "git/refs" in url: - return create_mock_response({"ref": "refs/heads/fix-123"}) return create_mock_response({}) mock_post.side_effect = post_side_effect @@ -155,12 +157,12 @@ def post_side_effect(url, **kwargs): assert result["issue_branch"] == "fix-123" assert mock_get.call_count == 5 # user (2x), check fork status, get default branch, get SHA - + get_calls = [call[0][0] for call in mock_get.call_args_list] assert get_calls.count("https://api.github.com/user") == 2 # get user, check fork assert get_calls.count("https://api.github.com/repos/test_user/repo") == 2 # check status, get default branch assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls - + post_calls = [call[0][0] for call in mock_post.call_args_list] assert "https://api.github.com/repos/owner/repo/forks" in post_calls assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls @@ -180,27 +182,29 @@ def json(self): def raise_for_status(self): if self.status_code >= 400: - raise requests.RequestException(f"HTTP {self.status_code}") - return None + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) return MockResponse(json_data, status_code) get_responses = { "https://api.github.com/user": create_mock_response({"login": "test_user"}), - "https://api.github.com/repos/test_user/repo": create_mock_response({"name": "repo", "default_branch": "main"}), - "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response({"object": {"sha": "abc123"}}), + "https://api.github.com/repos/test_user/repo": create_mock_response( + {"name": "repo", "default_branch": "main"} + ), + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( + {"object": {"sha": "abc123"}} + ), } - def get_side_effect(url, **kwargs): + def get_side_effect(url, **_): return get_responses.get(url, create_mock_response({"default_branch": "main"})) mock_get.side_effect = get_side_effect - def post_side_effect(url, **kwargs): + def post_side_effect(url, **_): if "merge-upstream" in url: return create_mock_response({}) - elif "git/refs" in url: - return create_mock_response({"ref": "refs/heads/fix-123"}) return create_mock_response({}) mock_post.side_effect = post_side_effect @@ -214,12 +218,12 @@ def post_side_effect(url, **kwargs): assert result["issue_branch"] == "fix-123" assert mock_get.call_count == 5 # user, check fork, check fork status, get default branch, get SHA - + get_calls = [call[0][0] for call in mock_get.call_args_list] assert "https://api.github.com/user" in get_calls assert "https://api.github.com/repos/test_user/repo" in get_calls assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls - + post_calls = [call[0][0] for call in mock_post.call_args_list] assert "https://api.github.com/repos/test_user/repo/merge-upstream" in post_calls assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls @@ -227,7 +231,7 @@ def post_side_effect(url, **kwargs): @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, mock_post, mock_get): + def test_run_error_handling(self, _, mock_get): mock_get.side_effect = requests.RequestException("API Error") token = Secret.from_token("test_token") From b4a375d4ecd00586091d81328ca8e5f06680df4a Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 25 Apr 2025 08:16:26 +0200 Subject: [PATCH 17/51] fix pydoc and extend init files --- integrations/github/pydoc/config.yml | 12 +++++------ .../components/connectors/github/__init__.py | 20 +++++++++++++++++++ .../prompts/github/__init__.py | 14 ++++++++++++- 3 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 integrations/github/src/haystack_integrations/components/connectors/github/__init__.py diff --git a/integrations/github/pydoc/config.yml b/integrations/github/pydoc/config.yml index 83694982ea..ae7fb568f5 100644 --- a/integrations/github/pydoc/config.yml +++ b/integrations/github/pydoc/config.yml @@ -2,12 +2,12 @@ loaders: - type: haystack_pydoc_tools.loaders.CustomPythonLoader search_path: [../src] modules: [ - "haystack_integrations.components.connectors.file_editor", - "haystack_integrations.components.connectors.issue_commenter", - "haystack_integrations.components.connectors.issue_viewer", - "haystack_integrations.components.connectors.pr_creator", - "haystack_integrations.components.connectors.repo_viewer", - "haystack_integrations.components.connectors.repository_forker", + "haystack_integrations.components.connectors.github.file_editor", + "haystack_integrations.components.connectors.github.issue_commenter", + "haystack_integrations.components.connectors.github.issue_viewer", + "haystack_integrations.components.connectors.github.pr_creator", + "haystack_integrations.components.connectors.github.repo_viewer", + "haystack_integrations.components.connectors.github.repository_forker", ] ignore_when_discovered: ["__init__"] processors: diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py new file mode 100644 index 0000000000..86ec5c8853 --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from .file_editor import Command, GithubFileEditor +from .issue_commenter import GithubIssueCommenter +from .issue_viewer import GithubIssueViewer +from .pr_creator import GithubPRCreator +from .repo_viewer import GithubRepositoryViewer +from .repository_forker import GithubRepoForker + +__all__ = [ + "Command", + "GithubFileEditor", + "GithubIssueCommenter", + "GithubIssueViewer", + "GithubPRCreator", + "GithubRepoForker", + "GithubRepositoryViewer", +] diff --git a/integrations/github/src/haystack_integrations/prompts/github/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py index 99f69f7326..966808a066 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/__init__.py +++ b/integrations/github/src/haystack_integrations/prompts/github/__init__.py @@ -2,7 +2,19 @@ # # SPDX-License-Identifier: Apache-2.0 +from .comment_tool import comment_prompt, comment_schema +from .file_editor_tool import file_editor_prompt, file_editor_schema +from .pr_system_prompt import system_prompt as pr_system_prompt from .repo_viewer_tool import repo_viewer_prompt, repo_viewer_schema from .system_prompt import issue_prompt -__all__ = ["issue_prompt", "repo_viewer_prompt", "repo_viewer_schema"] +__all__ = [ + "comment_prompt", + "comment_schema", + "file_editor_prompt", + "file_editor_schema", + "issue_prompt", + "pr_system_prompt", + "repo_viewer_prompt", + "repo_viewer_schema", +] From 2b8bc14be819478fefeb2760f1ad54ac269a090e Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 25 Apr 2025 08:26:40 +0200 Subject: [PATCH 18/51] Add integration:github to labeler.yml --- .github/labeler.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/labeler.yml b/.github/labeler.yml index 985e7cc1f7..ff7e8f133c 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -39,6 +39,11 @@ integration:fastembed: - any-glob-to-any-file: "integrations/fastembed/**/*" - any-glob-to-any-file: ".github/workflows/fastembed.yml" +integration:github: + - changed-files: + - any-glob-to-any-file: "integrations/github/**/*" + - any-glob-to-any-file: ".github/workflows/github.yml" + integration:google-ai: - changed-files: - any-glob-to-any-file: "integrations/google_ai/**/*" From 8480832e88852ce364c2045a8bd14ab005f48371 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 25 Apr 2025 08:49:22 +0200 Subject: [PATCH 19/51] unify how we set GITHUB_TOKEN in tests --- integrations/github/tests/test_file_editor.py | 53 ++++++++++--------- .../github/tests/test_issue_commenter.py | 31 ++++++----- .../github/tests/test_issue_viewer.py | 31 ++++++----- integrations/github/tests/test_pr_creator.py | 24 +++++---- integrations/github/tests/test_repo_viewer.py | 38 ++++++------- .../github/tests/test_repository_forker.py | 36 +++++++------ 6 files changed, 117 insertions(+), 96 deletions(-) diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py index 7d1a739bdf..51ed2167de 100644 --- a/integrations/github/tests/test_file_editor.py +++ b/integrations/github/tests/test_file_editor.py @@ -22,7 +22,7 @@ def test_init_default(self, monkeypatch): assert editor.raise_on_failure is True def test_init_with_parameters(self): - token = Secret.from_token("test_token") + token = Secret.from_token("test-token") editor = GithubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) assert editor.github_token == token assert editor.default_repo == "owner/repo" @@ -33,7 +33,7 @@ def test_init_with_parameters(self): GithubFileEditor(github_token="not_a_secret") def test_to_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") @@ -52,7 +52,7 @@ def test_to_dict(self, monkeypatch): } def test_from_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") data = { "type": "haystack_integrations.components.connectors.github.file_editor.GithubFileEditor", "init_parameters": { @@ -72,7 +72,9 @@ def test_from_dict(self, monkeypatch): @patch("requests.get") @patch("requests.put") - def test_run_edit(self, mock_put, mock_get): + def test_run_edit(self, mock_put, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.return_value.json.return_value = { "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" "sha": "abc123", @@ -80,8 +82,7 @@ def test_run_edit(self, mock_put, mock_get): mock_get.return_value.raise_for_status.return_value = None mock_put.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - editor = GithubFileEditor(github_token=token) + editor = GithubFileEditor() result = editor.run( command=Command.EDIT, @@ -97,7 +98,7 @@ def test_run_edit(self, mock_put, mock_get): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", - "Authorization": "Bearer test_token", + "Authorization": "Bearer test-token", }, params={"ref": "main"}, timeout=10, @@ -112,7 +113,9 @@ def test_run_edit(self, mock_put, mock_get): @patch("requests.get") @patch("requests.patch") - def test_run_undo(self, mock_patch, mock_get): + def test_run_undo(self, mock_patch, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + def create_mock_response(json_data, status_code=200): class MockResponse: def __init__(self, data, code): @@ -143,8 +146,7 @@ def get_side_effect(url, **_): mock_patch.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - editor = GithubFileEditor(github_token=token) + editor = GithubFileEditor() result = editor.run( command=Command.UNDO, payload={"message": "Undo last change"}, repo="owner/repo", branch="main" @@ -158,18 +160,19 @@ def get_side_effect(url, **_): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", - "Authorization": "Bearer test_token", + "Authorization": "Bearer test-token", }, json={"sha": "def456", "force": True}, timeout=10, ) @patch("requests.put") - def test_run_create(self, mock_put): + def test_run_create(self, mock_put, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_put.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - editor = GithubFileEditor(github_token=token) + editor = GithubFileEditor() result = editor.run( command=Command.CREATE, @@ -185,7 +188,7 @@ def test_run_create(self, mock_put): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", - "Authorization": "Bearer test_token", + "Authorization": "Bearer test-token", }, json={ "message": "Create new file", @@ -197,7 +200,9 @@ def test_run_create(self, mock_put): @patch("requests.get") @patch("requests.delete") - def test_run_delete(self, mock_delete, mock_get): + def test_run_delete(self, mock_delete, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.return_value.json.return_value = { "content": "SGVsbG8gV29ybGQ=", # Base64 encoded "Hello World" "sha": "abc123", @@ -206,8 +211,7 @@ def test_run_delete(self, mock_delete, mock_get): mock_delete.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - editor = GithubFileEditor(github_token=token) + editor = GithubFileEditor() result = editor.run( command=Command.DELETE, @@ -223,7 +227,7 @@ def test_run_delete(self, mock_delete, mock_get): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", - "Authorization": "Bearer test_token", + "Authorization": "Bearer test-token", }, params={"ref": "main"}, timeout=10, @@ -234,18 +238,19 @@ def test_run_delete(self, mock_delete, mock_get): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubFileEditor", - "Authorization": "Bearer test_token", + "Authorization": "Bearer test-token", }, json={"message": "Delete file", "sha": "abc123", "branch": "main"}, timeout=10, ) @patch("requests.get") - def test_run_error_handling(self, mock_get): + def test_run_error_handling(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.side_effect = requests.RequestException("API Error") - token = Secret.from_token("test_token") - editor = GithubFileEditor(github_token=token, raise_on_failure=False) + editor = GithubFileEditor(raise_on_failure=False) result = editor.run( command=Command.EDIT, @@ -256,7 +261,7 @@ def test_run_error_handling(self, mock_get): assert "Error: API Error" in result["result"] - editor = GithubFileEditor(github_token=token, raise_on_failure=True) + editor = GithubFileEditor(raise_on_failure=True) with pytest.raises(requests.RequestException): editor.run( command=Command.EDIT, diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py index d487a17a8e..d71440c3fd 100644 --- a/integrations/github/tests/test_issue_commenter.py +++ b/integrations/github/tests/test_issue_commenter.py @@ -21,14 +21,14 @@ def test_init_default(self, monkeypatch): assert commenter.retry_attempts == 2 def test_init_with_parameters(self): - token = Secret.from_token("test_token") + token = Secret.from_token("test-token") commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) assert commenter.github_token == token assert commenter.raise_on_failure is False assert commenter.retry_attempts == 3 def test_to_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") @@ -46,7 +46,7 @@ def test_to_dict(self, monkeypatch): } def test_from_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") data = { "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", @@ -64,11 +64,12 @@ def test_from_dict(self, monkeypatch): assert commenter.retry_attempts == 3 @patch("requests.post") - def test_run(self, mock_post): + def test_run(self, mock_post, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_post.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - commenter = GithubIssueCommenter(github_token=token) + commenter = GithubIssueCommenter() result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") @@ -79,30 +80,32 @@ def test_run(self, mock_post): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubIssueCommenter", - "Authorization": "Bearer test_token", + "Authorization": "Bearer test-token", }, json={"body": "Test comment"}, timeout=10, ) @patch("requests.post") - def test_run_error_handling(self, mock_post): + def test_run_error_handling(self, mock_post, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_post.side_effect = requests.RequestException("API Error") - token = Secret.from_token("test_token") - commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False) + commenter = GithubIssueCommenter(raise_on_failure=False) result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") assert result["success"] is False - commenter = GithubIssueCommenter(github_token=token, raise_on_failure=True) + commenter = GithubIssueCommenter(raise_on_failure=True) with pytest.raises(requests.RequestException): commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") - def test_parse_github_url(self): - token = Secret.from_token("test_token") - commenter = GithubIssueCommenter(github_token=token) + def test_parse_github_url(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + commenter = GithubIssueCommenter() owner, repo, issue_number = commenter._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py index a23d0e4d36..d7711c5446 100644 --- a/integrations/github/tests/test_issue_viewer.py +++ b/integrations/github/tests/test_issue_viewer.py @@ -11,7 +11,9 @@ class TestGithubIssueViewer: - def test_init_default(self): + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + viewer = GithubIssueViewer() assert viewer.github_token is None assert viewer.raise_on_failure is True @@ -27,7 +29,7 @@ def test_init_with_parameters(self, monkeypatch): assert viewer.retry_attempts == 3 def test_to_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") @@ -45,7 +47,7 @@ def test_to_dict(self, monkeypatch): } def test_from_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") data = { "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", @@ -63,7 +65,9 @@ def test_from_dict(self, monkeypatch): assert viewer.retry_attempts == 3 @patch("requests.get") - def test_run(self, mock_get): + def test_run(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.return_value.json.return_value = { "body": "Issue body", "title": "Issue title", @@ -105,8 +109,7 @@ def test_run(self, mock_get): ), ] - token = Secret.from_token("test_token") - viewer = GithubIssueViewer(github_token=token) + viewer = GithubIssueViewer() result = viewer.run(url="https://github.com/owner/repo/issues/123") @@ -118,11 +121,12 @@ def test_run(self, mock_get): assert mock_get.call_count == 2 @patch("requests.get") - def test_run_error_handling(self, mock_get): + def test_run_error_handling(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.side_effect = requests.RequestException("API Error") - token = Secret.from_token("test_token") - viewer = GithubIssueViewer(github_token=token, raise_on_failure=False) + viewer = GithubIssueViewer(raise_on_failure=False) result = viewer.run(url="https://github.com/owner/repo/issues/123") @@ -130,13 +134,14 @@ def test_run_error_handling(self, mock_get): assert result["documents"][0].meta["type"] == "error" assert result["documents"][0].meta["error"] is True - viewer = GithubIssueViewer(github_token=token, raise_on_failure=True) + viewer = GithubIssueViewer(raise_on_failure=True) with pytest.raises(requests.RequestException): viewer.run(url="https://github.com/owner/repo/issues/123") - def test_parse_github_url(self): - token = Secret.from_token("test_token") - viewer = GithubIssueViewer(github_token=token) + def test_parse_github_url(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + viewer = GithubIssueViewer() owner, repo, issue_number = viewer._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py index cc8bbb1196..085c2328d9 100644 --- a/integrations/github/tests/test_pr_creator.py +++ b/integrations/github/tests/test_pr_creator.py @@ -20,7 +20,7 @@ def test_init_default(self, monkeypatch): assert pr_creator.raise_on_failure is True def test_init_with_parameters(self): - token = Secret.from_token("test_token") + token = Secret.from_token("test-token") pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) assert pr_creator.github_token == token assert pr_creator.raise_on_failure is False @@ -29,7 +29,7 @@ def test_init_with_parameters(self): GithubPRCreator(github_token="not_a_secret") def test_to_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") @@ -46,7 +46,7 @@ def test_to_dict(self, monkeypatch): } def test_from_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") data = { "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", @@ -63,15 +63,16 @@ def test_from_dict(self, monkeypatch): @patch("requests.get") @patch("requests.post") - def test_run(self, mock_post, mock_get): + def test_run(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.return_value.json.return_value = {"login": "test_user"} mock_get.return_value.raise_for_status.return_value = None mock_post.return_value.json.return_value = {"number": 123} mock_post.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - pr_creator = GithubPRCreator(github_token=token) + pr_creator = GithubPRCreator() with patch.object(pr_creator, "_check_fork_exists", return_value=True): result = pr_creator.run( @@ -91,7 +92,7 @@ def test_run(self, mock_post, mock_get): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubPRCreator", - "Authorization": "Bearer test_token", + "Authorization": "Bearer test-token", }, json={ "title": "Test PR", @@ -106,11 +107,12 @@ def test_run(self, mock_post, mock_get): @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, _, mock_get): + def test_run_error_handling(self, _, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.side_effect = requests.RequestException("API Error") - token = Secret.from_token("test_token") - pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + pr_creator = GithubPRCreator(raise_on_failure=False) with patch.object(pr_creator, "_check_fork_exists", return_value=True): result = pr_creator.run( @@ -122,7 +124,7 @@ def test_run_error_handling(self, _, mock_get): assert "Error" in result["result"] - pr_creator = GithubPRCreator(github_token=token, raise_on_failure=True) + pr_creator = GithubPRCreator(raise_on_failure=True) with pytest.raises(requests.RequestException): pr_creator.run( issue_url="https://github.com/owner/repo/issues/456", diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index a68de3dec8..be1f0b7882 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -22,7 +22,7 @@ def test_init_default(self, monkeypatch): assert viewer.branch is None def test_init_with_parameters(self): - token = Secret.from_token("test_token") + token = Secret.from_token("test-token") viewer = GithubRepositoryViewer( github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" ) @@ -36,7 +36,7 @@ def test_init_with_parameters(self): GithubRepositoryViewer(github_token="not_a_secret") def test_to_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") @@ -58,7 +58,7 @@ def test_to_dict(self, monkeypatch): } def test_from_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") data = { "type": "haystack_integrations.components.connectors.github.repo_viewer.GithubRepositoryViewer", @@ -80,7 +80,9 @@ def test_from_dict(self, monkeypatch): assert viewer.branch == "main" @patch("requests.get") - def test_run_file(self, mock_get): + def test_run_file(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.return_value.json.return_value = { "name": "README.md", "path": "README.md", @@ -91,8 +93,7 @@ def test_run_file(self, mock_get): } mock_get.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - viewer = GithubRepositoryViewer(github_token=token) + viewer = GithubRepositoryViewer() result = viewer.run(repo="owner/repo", path="README.md", branch="main") @@ -106,13 +107,14 @@ def test_run_file(self, mock_get): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepositoryViewer", - "Authorization": "Bearer test_token", }, timeout=10, ) @patch("requests.get") - def test_run_directory(self, mock_get): + def test_run_directory(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.return_value.json.return_value = [ {"name": "docs", "path": "docs", "type": "dir", "html_url": "https://github.com/owner/repo/tree/main/docs"}, { @@ -125,8 +127,7 @@ def test_run_directory(self, mock_get): ] mock_get.return_value.raise_for_status.return_value = None - token = Secret.from_token("test_token") - viewer = GithubRepositoryViewer(github_token=token) + viewer = GithubRepositoryViewer() result = viewer.run(repo="owner/repo", path="", branch="main") @@ -141,30 +142,31 @@ def test_run_directory(self, mock_get): headers={ "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepositoryViewer", - "Authorization": "Bearer test_token", }, timeout=10, ) @patch("requests.get") - def test_run_error_handling(self, mock_get): + def test_run_error_handling(self, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.side_effect = requests.RequestException("API Error") - token = Secret.from_token("test_token") - viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=False) + viewer = GithubRepositoryViewer(raise_on_failure=False) result = viewer.run(repo="owner/repo", path="README.md", branch="main") assert len(result["documents"]) == 1 assert result["documents"][0].meta["type"] == "error" - viewer = GithubRepositoryViewer(github_token=token, raise_on_failure=True) + viewer = GithubRepositoryViewer(raise_on_failure=True) with pytest.raises(requests.RequestException): viewer.run(repo="owner/repo", path="README.md", branch="main") - def test_parse_repo(self): - token = Secret.from_token("test_token") - viewer = GithubRepositoryViewer(github_token=token) + def test_parse_repo(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + viewer = GithubRepositoryViewer() owner, repo = viewer._parse_repo("owner/repo") assert owner == "owner" diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py index dde362fdea..0e7d7015bd 100644 --- a/integrations/github/tests/test_repository_forker.py +++ b/integrations/github/tests/test_repository_forker.py @@ -25,7 +25,7 @@ def test_init_default(self, monkeypatch): assert forker.create_branch is True def test_init_with_parameters(self): - token = Secret.from_token("test_token") + token = Secret.from_token("test-token") forker = GithubRepoForker( github_token=token, raise_on_failure=False, @@ -48,7 +48,7 @@ def test_init_with_parameters(self): GithubRepoForker(github_token="not_a_secret") def test_to_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") @@ -78,7 +78,7 @@ def test_to_dict(self, monkeypatch): } def test_from_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test_token") + monkeypatch.setenv("ENV_VAR", "test-token") data = { "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", @@ -105,7 +105,9 @@ def test_from_dict(self, monkeypatch): @patch("requests.get") @patch("requests.post") - def test_run_create_fork(self, mock_post, mock_get): + def test_run_create_fork(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + def create_mock_response(json_data, status_code=200): class MockResponse: def __init__(self, data, code): @@ -148,8 +150,7 @@ def post_side_effect(url, **_): mock_post.side_effect = post_side_effect - token = Secret.from_token("test_token") - forker = GithubRepoForker(github_token=token, create_branch=True, auto_sync=False) + forker = GithubRepoForker(create_branch=True, auto_sync=False) result = forker.run(url="https://github.com/owner/repo/issues/123") @@ -170,7 +171,9 @@ def post_side_effect(url, **_): @patch("requests.get") @patch("requests.post") - def test_run_sync_existing_fork(self, mock_post, mock_get): + def test_run_sync_existing_fork(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + def create_mock_response(json_data, status_code=200): class MockResponse: def __init__(self, data, code): @@ -209,8 +212,7 @@ def post_side_effect(url, **_): mock_post.side_effect = post_side_effect - token = Secret.from_token("test_token") - forker = GithubRepoForker(github_token=token, create_branch=True, auto_sync=True) + forker = GithubRepoForker(create_branch=True, auto_sync=True) result = forker.run(url="https://github.com/owner/repo/issues/123") @@ -231,24 +233,26 @@ def post_side_effect(url, **_): @patch("requests.get") @patch("requests.post") - def test_run_error_handling(self, _, mock_get): + def test_run_error_handling(self, _, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + mock_get.side_effect = requests.RequestException("API Error") - token = Secret.from_token("test_token") - forker = GithubRepoForker(github_token=token, raise_on_failure=False) + forker = GithubRepoForker(raise_on_failure=False) result = forker.run(url="https://github.com/owner/repo/issues/123") assert result["repo"] == "" assert result["issue_branch"] is None - forker = GithubRepoForker(github_token=token, raise_on_failure=True) + forker = GithubRepoForker(raise_on_failure=True) with pytest.raises(requests.RequestException): forker.run(url="https://github.com/owner/repo/issues/123") - def test_parse_github_url(self): - token = Secret.from_token("test_token") - forker = GithubRepoForker(github_token=token) + def test_parse_github_url(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + forker = GithubRepoForker() owner, repo, issue_number = forker._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" From ca977a159ccfd8c1b6d0a52ef5f0ac67f4cda620 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 25 Apr 2025 11:13:43 +0200 Subject: [PATCH 20/51] fix 3 usage examples. 3 remaining --- .../components/connectors/github/file_editor.py | 2 +- .../connectors/github/issue_commenter.py | 5 +++-- .../connectors/github/issue_viewer.py | 7 +++---- .../components/connectors/github/pr_creator.py | 2 +- .../components/connectors/github/repo_viewer.py | 17 +++++++---------- .../connectors/github/repository_forker.py | 5 +++-- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py index 40ad9829cf..082c2b0140 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -36,7 +36,7 @@ class GithubFileEditor: ### Usage example ```python - from haystack.components.actions import GithubFileEditor + from haystack_integrations.components.connectors.github import Command, GithubFileEditor from haystack.utils import Secret # Initialize with default repo and branch diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py index 558c13f9b0..9fa84e804e 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py @@ -19,7 +19,8 @@ class GithubIssueCommenter: ### Usage example ```python - from haystack.components.writers import GithubIssueCommenter + from haystack_integrations.components.connectors.github import GithubIssueCommenter + from haystack.utils import Secret commenter = GithubIssueCommenter(github_token=Secret.from_env_var("GITHUB_TOKEN")) result = commenter.run( @@ -27,7 +28,7 @@ class GithubIssueCommenter: comment="Thanks for reporting this issue! We'll look into it." ) - assert result["success"] is True + print(result["success"]) ``` """ diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py index c61d5b0d95..1e40150366 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py @@ -20,15 +20,14 @@ class GithubIssueViewer: ### Usage example ```python - from haystack.components.fetchers import GithubIssueViewer + from haystack_integrations.components.connectors.github import GithubIssueViewer - viewer = GithubIssueViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) + viewer = GithubIssueViewer() docs = viewer.run( url="https://github.com/owner/repo/issues/123" )["documents"] - assert len(docs) >= 1 # At least the main issue - assert docs[0].meta["type"] == "issue" + print(docs) ``` """ diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py index b73f7db32d..5255a77859 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py @@ -17,7 +17,7 @@ class GithubPRCreator: ### Usage example ```python - from haystack.components.actions import GithubPRCreator + from haystack_integrations.components.connectors.github import GithubPRCreator from haystack.utils import Secret pr_creator = GithubPRCreator( diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index 89e6cfcf9b..69af00dd82 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -43,28 +43,25 @@ class GithubRepositoryViewer: ### Usage example ```python - from haystack.components.fetchers import GithubRepositoryViewer - from haystack.utils import Secret + from haystack_integrations.components.connectors.github import GithubRepositoryViewer - # Using token directly - viewer = GithubRepositoryViewer(github_token=Secret.from_token("your_token")) - - # Using environment variable - viewer = GithubRepositoryViewer(github_token=Secret.from_env_var("GITHUB_TOKEN")) + viewer = GithubRepositoryViewer() # List directory contents - returns multiple documents result = viewer.run( repo="owner/repository", path="docs/", - ref="main" + branch="main" ) + print(result) # Get specific file - returns single document result = viewer.run( repo="owner/repository", path="README.md", - ref="main" + branch="main" ) + print(result) ``` """ @@ -203,7 +200,7 @@ def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = Non :param repo: Repository in format "owner/repo" :param path: Path within repository (default: root) - :param ref: Git reference (branch, tag, commit) to use + :param branch: Git reference (branch, tag, commit) to use :return: Dictionary containing list of documents """ if repo is None: diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py index a15122f429..c0e685775e 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py @@ -18,17 +18,18 @@ class GithubRepoForker: ### Usage example ```python - from haystack.components.actions import GithubRepoForker + from haystack_integrations.components.connectors.github import GithubRepoForker from haystack.utils import Secret # Using direct token with auto-sync and branch creation forker = GithubRepoForker( - github_token=Secret.from_token("your_token"), + github_token=Secret.from_env_var("GITHUB_TOKEN"), auto_sync=True, create_branch=True ) result = forker.run(url="https://github.com/owner/repo/issues/123") + print(result) # Will create or sync fork and create branch "fix-123" ``` """ From e5111ed4156b67d80a742ee3d7993de2ac2fad54 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 30 Apr 2025 12:36:36 +0200 Subject: [PATCH 21/51] remove empty lines from prompts --- .../src/haystack_integrations/prompts/github/comment_tool.py | 3 +-- .../src/haystack_integrations/prompts/github/context.py | 4 +--- .../haystack_integrations/prompts/github/file_editor_tool.py | 3 +-- .../haystack_integrations/prompts/github/pr_system_prompt.py | 3 +-- .../haystack_integrations/prompts/github/repo_viewer_tool.py | 3 +-- .../src/haystack_integrations/prompts/github/system_prompt.py | 3 +-- 6 files changed, 6 insertions(+), 13 deletions(-) diff --git a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py index 015bb90b72..b4cf359845 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py @@ -1,5 +1,4 @@ -comment_prompt = """ -Haystack-Agent uses this tool to post a comment to a Github-issue discussion. +comment_prompt = """Haystack-Agent uses this tool to post a comment to a Github-issue discussion. Pass a `comment` string to post a comment. diff --git a/integrations/github/src/haystack_integrations/prompts/github/context.py b/integrations/github/src/haystack_integrations/prompts/github/context.py index 201477cc13..30b8c2a2ea 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/context.py +++ b/integrations/github/src/haystack_integrations/prompts/github/context.py @@ -1,6 +1,4 @@ -haystack_context_prompt = """ - -Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related +haystack_context_prompt = """Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related questions. The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. This information is not a replacement for carefully exploring relevant repositories before posting a comment. diff --git a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py index ba8c88f167..493e3979d2 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py @@ -1,5 +1,4 @@ -file_editor_prompt = """ -Use the file editor to edit an existing file in the repository. +file_editor_prompt = """Use the file editor to edit an existing file in the repository. You must provide a 'command' for the action that you want to perform: - edit diff --git a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py index a99c8ae100..59c9214357 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py @@ -1,5 +1,4 @@ -system_prompt = """ -The assistant is Haystack-Agent, created by deepset. +system_prompt = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent creates Pull Requests that resolve GitHub issues. Haystack-Agent receives a GitHub issue and all current comments. diff --git a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py index cfeae5347c..fb3d951274 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py @@ -1,5 +1,4 @@ -repo_viewer_prompt = """ -Haystack-Agent uses this tool to browse GitHub repositories. +repo_viewer_prompt = """Haystack-Agent uses this tool to browse GitHub repositories. Haystack-Agent can view directories and files with this tool. diff --git a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py index 14b612a08e..be521309d0 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py @@ -1,5 +1,4 @@ -issue_prompt = """ -The assistant is Haystack-Agent, created by deepset. +issue_prompt = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. Haystack-Agent receives a GitHub issue and all current comments. From dc0891665d97805efe4a9c86279c14fccfcb3cb7 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 30 Apr 2025 14:13:48 +0200 Subject: [PATCH 22/51] GitHub capitalization --- .../components/connectors/github/__init__.py | 24 +++++------ .../connectors/github/file_editor.py | 10 ++--- .../connectors/github/issue_commenter.py | 10 ++--- .../connectors/github/issue_viewer.py | 10 ++--- .../connectors/github/pr_creator.py | 10 ++--- .../connectors/github/repo_viewer.py | 10 ++--- .../connectors/github/repository_forker.py | 10 ++--- .../prompts/github/comment_tool.py | 2 +- .../prompts/github/system_prompt.py | 2 +- integrations/github/tests/test_file_editor.py | 40 +++++++++---------- .../github/tests/test_issue_commenter.py | 26 ++++++------ .../github/tests/test_issue_viewer.py | 24 +++++------ integrations/github/tests/test_pr_creator.py | 26 ++++++------ integrations/github/tests/test_repo_viewer.py | 32 +++++++-------- .../github/tests/test_repository_forker.py | 28 ++++++------- 15 files changed, 132 insertions(+), 132 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py index 86ec5c8853..08c16559dd 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py @@ -2,19 +2,19 @@ # # SPDX-License-Identifier: Apache-2.0 -from .file_editor import Command, GithubFileEditor -from .issue_commenter import GithubIssueCommenter -from .issue_viewer import GithubIssueViewer -from .pr_creator import GithubPRCreator -from .repo_viewer import GithubRepositoryViewer -from .repository_forker import GithubRepoForker +from .file_editor import Command, GitHubFileEditor +from .issue_commenter import GitHubIssueCommenter +from .issue_viewer import GitHubIssueViewer +from .pr_creator import GitHubPRCreator +from .repo_viewer import GitHubRepositoryViewer +from .repository_forker import GitHubRepoForker __all__ = [ "Command", - "GithubFileEditor", - "GithubIssueCommenter", - "GithubIssueViewer", - "GithubPRCreator", - "GithubRepoForker", - "GithubRepositoryViewer", + "GitHubFileEditor", + "GitHubIssueCommenter", + "GitHubIssueViewer", + "GitHubPRCreator", + "GitHubRepoForker", + "GitHubRepositoryViewer", ] diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py index 082c2b0140..3de935ddfa 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -27,7 +27,7 @@ class Command(str, Enum): @component -class GithubFileEditor: +class GitHubFileEditor: """ A Haystack component for editing files in GitHub repositories. @@ -36,11 +36,11 @@ class GithubFileEditor: ### Usage example ```python - from haystack_integrations.components.connectors.github import Command, GithubFileEditor + from haystack_integrations.components.connectors.github import Command, GitHubFileEditor from haystack.utils import Secret # Initialize with default repo and branch - editor = GithubFileEditor( + editor = GitHubFileEditor( github_token=Secret.from_env_var("GITHUB_TOKEN"), repo="owner/repo", branch="main" @@ -99,7 +99,7 @@ def __init__( self.headers = { "Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {self.github_token.resolve_value()}", - "User-Agent": "Haystack/GithubFileEditor", + "User-Agent": "Haystack/GitHubFileEditor", } def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: @@ -280,7 +280,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubFileEditor": + def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditor": """Deserialize the component from a dictionary.""" init_params = data["init_parameters"] deserialize_secrets_inplace(init_params, keys=["github_token"]) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py index 9fa84e804e..feb54cc800 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py @@ -10,7 +10,7 @@ @component -class GithubIssueCommenter: +class GitHubIssueCommenter: """ Posts comments to GitHub issues. @@ -19,10 +19,10 @@ class GithubIssueCommenter: ### Usage example ```python - from haystack_integrations.components.connectors.github import GithubIssueCommenter + from haystack_integrations.components.connectors.github import GitHubIssueCommenter from haystack.utils import Secret - commenter = GithubIssueCommenter(github_token=Secret.from_env_var("GITHUB_TOKEN")) + commenter = GitHubIssueCommenter(github_token=Secret.from_env_var("GITHUB_TOKEN")) result = commenter.run( url="https://github.com/owner/repo/issues/123", comment="Thanks for reporting this issue! We'll look into it." @@ -52,7 +52,7 @@ def __init__( # Set base headers during initialization self.headers = { "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubIssueCommenter", + "User-Agent": "Haystack/GitHubIssueCommenter", } def _get_request_headers(self) -> dict: @@ -123,7 +123,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueCommenter": + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenter": """ Deserialize the component from a dictionary. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py index 1e40150366..198cec7cee 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py @@ -10,7 +10,7 @@ @component -class GithubIssueViewer: +class GitHubIssueViewer: """ Fetches and parses GitHub issues into Haystack documents. @@ -20,9 +20,9 @@ class GithubIssueViewer: ### Usage example ```python - from haystack_integrations.components.connectors.github import GithubIssueViewer + from haystack_integrations.components.connectors.github import GitHubIssueViewer - viewer = GithubIssueViewer() + viewer = GitHubIssueViewer() docs = viewer.run( url="https://github.com/owner/repo/issues/123" )["documents"] @@ -51,7 +51,7 @@ def __init__( # Only set the basic headers during initialization self.headers = { "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubIssueViewer", + "User-Agent": "Haystack/GitHubIssueViewer", } def _get_request_headers(self) -> dict: @@ -162,7 +162,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubIssueViewer": + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewer": """ Deserialize the component from a dictionary. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py index 5255a77859..b0c740b851 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py @@ -9,7 +9,7 @@ @component -class GithubPRCreator: +class GitHubPRCreator: """ A Haystack component for creating pull requests from a fork back to the original repository. @@ -17,10 +17,10 @@ class GithubPRCreator: ### Usage example ```python - from haystack_integrations.components.connectors.github import GithubPRCreator + from haystack_integrations.components.connectors.github import GitHubPRCreator from haystack.utils import Secret - pr_creator = GithubPRCreator( + pr_creator = GitHubPRCreator( github_token=Secret.from_env_var("GITHUB_TOKEN") # Token from the fork owner ) @@ -58,7 +58,7 @@ def _get_headers(self) -> Dict[str, str]: return { "Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {self.github_token.resolve_value()}", - "User-Agent": "Haystack/GithubPRCreator", + "User-Agent": "Haystack/GitHubPRCreator", } def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: @@ -263,7 +263,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubPRCreator": + def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreator": """Deserialize the component from a dictionary.""" init_params = data["init_parameters"] deserialize_secrets_inplace(init_params, keys=["github_token"]) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index 69af00dd82..aef3e62fa7 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -22,7 +22,7 @@ class GitHubItem: @component -class GithubRepositoryViewer: +class GitHubRepositoryViewer: """ Navigates and fetches content from GitHub repositories. @@ -43,9 +43,9 @@ class GithubRepositoryViewer: ### Usage example ```python - from haystack_integrations.components.connectors.github import GithubRepositoryViewer + from haystack_integrations.components.connectors.github import GitHubRepositoryViewer - viewer = GithubRepositoryViewer() + viewer = GitHubRepositoryViewer() # List directory contents - returns multiple documents result = viewer.run( @@ -94,7 +94,7 @@ def __init__( self.headers = { "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubRepositoryViewer", + "User-Agent": "Haystack/GitHubRepositoryViewer", } def to_dict(self) -> Dict[str, Any]: @@ -113,7 +113,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubRepositoryViewer": + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepositoryViewer": """ Deserialize the component from a dictionary. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py index c0e685775e..f423f0ce78 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py @@ -9,7 +9,7 @@ @component -class GithubRepoForker: +class GitHubRepoForker: """ Forks a GitHub repository from an issue URL. @@ -18,11 +18,11 @@ class GithubRepoForker: ### Usage example ```python - from haystack_integrations.components.connectors.github import GithubRepoForker + from haystack_integrations.components.connectors.github import GitHubRepoForker from haystack.utils import Secret # Using direct token with auto-sync and branch creation - forker = GithubRepoForker( + forker = GitHubRepoForker( github_token=Secret.from_env_var("GITHUB_TOKEN"), auto_sync=True, create_branch=True @@ -67,7 +67,7 @@ def __init__( self.auto_sync = auto_sync self.create_branch = create_branch - self.headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GithubRepoForker"} + self.headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GitHubRepoForker"} def _parse_github_url(self, url: str) -> tuple[str, str, str]: """ @@ -226,7 +226,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GithubRepoForker": + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForker": """ Deserialize the component from a dictionary. diff --git a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py index b4cf359845..d23bf0b753 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py @@ -1,4 +1,4 @@ -comment_prompt = """Haystack-Agent uses this tool to post a comment to a Github-issue discussion. +comment_prompt = """Haystack-Agent uses this tool to post a comment to a GitHub-issue discussion. Pass a `comment` string to post a comment. diff --git a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py index be521309d0..6993000f4d 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py @@ -56,5 +56,5 @@ - If you find yourself running out of context space during exploration, say: "I need to continue exploring the codebase before providing a complete response." Then continue exploration in the next interaction. -Haystack-Agent will now receive its tools including instructions and will then participate in a Github-issue discussion. +Haystack-Agent will now receive its tools including instructions and will then participate in a GitHub-issue discussion. """ diff --git a/integrations/github/tests/test_file_editor.py b/integrations/github/tests/test_file_editor.py index 51ed2167de..a52d879d14 100644 --- a/integrations/github/tests/test_file_editor.py +++ b/integrations/github/tests/test_file_editor.py @@ -7,14 +7,14 @@ import requests from haystack.utils import Secret -from haystack_integrations.components.connectors.github.file_editor import Command, GithubFileEditor +from haystack_integrations.components.connectors.github.file_editor import Command, GitHubFileEditor -class TestGithubFileEditor: +class TestGitHubFileEditor: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - editor = GithubFileEditor() + editor = GitHubFileEditor() assert editor.github_token is not None assert editor.github_token.resolve_value() == "test-token" assert editor.default_repo is None @@ -23,26 +23,26 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): token = Secret.from_token("test-token") - editor = GithubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) + editor = GitHubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) assert editor.github_token == token assert editor.default_repo == "owner/repo" assert editor.default_branch == "feature" assert editor.raise_on_failure is False with pytest.raises(TypeError): - GithubFileEditor(github_token="not_a_secret") + GitHubFileEditor(github_token="not_a_secret") def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") - editor = GithubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) + editor = GitHubFileEditor(github_token=token, repo="owner/repo", branch="feature", raise_on_failure=False) data = editor.to_dict() assert data == { - "type": "haystack_integrations.components.connectors.github.file_editor.GithubFileEditor", + "type": "haystack_integrations.components.connectors.github.file_editor.GitHubFileEditor", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "repo": "owner/repo", @@ -54,7 +54,7 @@ def test_to_dict(self, monkeypatch): def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") data = { - "type": "haystack_integrations.components.connectors.github.file_editor.GithubFileEditor", + "type": "haystack_integrations.components.connectors.github.file_editor.GitHubFileEditor", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "repo": "owner/repo", @@ -63,7 +63,7 @@ def test_from_dict(self, monkeypatch): }, } - editor = GithubFileEditor.from_dict(data) + editor = GitHubFileEditor.from_dict(data) assert editor.github_token == Secret.from_env_var("ENV_VAR") assert editor.default_repo == "owner/repo" @@ -82,7 +82,7 @@ def test_run_edit(self, mock_put, mock_get, monkeypatch): mock_get.return_value.raise_for_status.return_value = None mock_put.return_value.raise_for_status.return_value = None - editor = GithubFileEditor() + editor = GitHubFileEditor() result = editor.run( command=Command.EDIT, @@ -97,7 +97,7 @@ def test_run_edit(self, mock_put, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/contents/test.txt", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubFileEditor", + "User-Agent": "Haystack/GitHubFileEditor", "Authorization": "Bearer test-token", }, params={"ref": "main"}, @@ -146,7 +146,7 @@ def get_side_effect(url, **_): mock_patch.return_value.raise_for_status.return_value = None - editor = GithubFileEditor() + editor = GitHubFileEditor() result = editor.run( command=Command.UNDO, payload={"message": "Undo last change"}, repo="owner/repo", branch="main" @@ -159,7 +159,7 @@ def get_side_effect(url, **_): "https://api.github.com/repos/owner/repo/git/refs/heads/main", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubFileEditor", + "User-Agent": "Haystack/GitHubFileEditor", "Authorization": "Bearer test-token", }, json={"sha": "def456", "force": True}, @@ -172,7 +172,7 @@ def test_run_create(self, mock_put, monkeypatch): mock_put.return_value.raise_for_status.return_value = None - editor = GithubFileEditor() + editor = GitHubFileEditor() result = editor.run( command=Command.CREATE, @@ -187,7 +187,7 @@ def test_run_create(self, mock_put, monkeypatch): "https://api.github.com/repos/owner/repo/contents/new.txt", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubFileEditor", + "User-Agent": "Haystack/GitHubFileEditor", "Authorization": "Bearer test-token", }, json={ @@ -211,7 +211,7 @@ def test_run_delete(self, mock_delete, mock_get, monkeypatch): mock_delete.return_value.raise_for_status.return_value = None - editor = GithubFileEditor() + editor = GitHubFileEditor() result = editor.run( command=Command.DELETE, @@ -226,7 +226,7 @@ def test_run_delete(self, mock_delete, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/contents/test.txt", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubFileEditor", + "User-Agent": "Haystack/GitHubFileEditor", "Authorization": "Bearer test-token", }, params={"ref": "main"}, @@ -237,7 +237,7 @@ def test_run_delete(self, mock_delete, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/contents/test.txt", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubFileEditor", + "User-Agent": "Haystack/GitHubFileEditor", "Authorization": "Bearer test-token", }, json={"message": "Delete file", "sha": "abc123", "branch": "main"}, @@ -250,7 +250,7 @@ def test_run_error_handling(self, mock_get, monkeypatch): mock_get.side_effect = requests.RequestException("API Error") - editor = GithubFileEditor(raise_on_failure=False) + editor = GitHubFileEditor(raise_on_failure=False) result = editor.run( command=Command.EDIT, @@ -261,7 +261,7 @@ def test_run_error_handling(self, mock_get, monkeypatch): assert "Error: API Error" in result["result"] - editor = GithubFileEditor(raise_on_failure=True) + editor = GitHubFileEditor(raise_on_failure=True) with pytest.raises(requests.RequestException): editor.run( command=Command.EDIT, diff --git a/integrations/github/tests/test_issue_commenter.py b/integrations/github/tests/test_issue_commenter.py index d71440c3fd..b187218791 100644 --- a/integrations/github/tests/test_issue_commenter.py +++ b/integrations/github/tests/test_issue_commenter.py @@ -7,14 +7,14 @@ import requests from haystack.utils import Secret -from haystack_integrations.components.connectors.github.issue_commenter import GithubIssueCommenter +from haystack_integrations.components.connectors.github.issue_commenter import GitHubIssueCommenter -class TestGithubIssueCommenter: +class TestGitHubIssueCommenter: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - commenter = GithubIssueCommenter() + commenter = GitHubIssueCommenter() assert commenter.github_token is not None assert commenter.github_token.resolve_value() == "test-token" assert commenter.raise_on_failure is True @@ -22,7 +22,7 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): token = Secret.from_token("test-token") - commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) + commenter = GitHubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) assert commenter.github_token == token assert commenter.raise_on_failure is False assert commenter.retry_attempts == 3 @@ -32,12 +32,12 @@ def test_to_dict(self, monkeypatch): token = Secret.from_env_var("ENV_VAR") - commenter = GithubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) + commenter = GitHubIssueCommenter(github_token=token, raise_on_failure=False, retry_attempts=3) data = commenter.to_dict() assert data == { - "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", + "type": "haystack_integrations.components.connectors.github.issue_commenter.GitHubIssueCommenter", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -49,7 +49,7 @@ def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") data = { - "type": "haystack_integrations.components.connectors.github.issue_commenter.GithubIssueCommenter", + "type": "haystack_integrations.components.connectors.github.issue_commenter.GitHubIssueCommenter", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -57,7 +57,7 @@ def test_from_dict(self, monkeypatch): }, } - commenter = GithubIssueCommenter.from_dict(data) + commenter = GitHubIssueCommenter.from_dict(data) assert commenter.github_token == Secret.from_env_var("ENV_VAR") assert commenter.raise_on_failure is False @@ -69,7 +69,7 @@ def test_run(self, mock_post, monkeypatch): mock_post.return_value.raise_for_status.return_value = None - commenter = GithubIssueCommenter() + commenter = GitHubIssueCommenter() result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") @@ -79,7 +79,7 @@ def test_run(self, mock_post, monkeypatch): "https://api.github.com/repos/owner/repo/issues/123/comments", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubIssueCommenter", + "User-Agent": "Haystack/GitHubIssueCommenter", "Authorization": "Bearer test-token", }, json={"body": "Test comment"}, @@ -92,20 +92,20 @@ def test_run_error_handling(self, mock_post, monkeypatch): mock_post.side_effect = requests.RequestException("API Error") - commenter = GithubIssueCommenter(raise_on_failure=False) + commenter = GitHubIssueCommenter(raise_on_failure=False) result = commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") assert result["success"] is False - commenter = GithubIssueCommenter(raise_on_failure=True) + commenter = GitHubIssueCommenter(raise_on_failure=True) with pytest.raises(requests.RequestException): commenter.run(url="https://github.com/owner/repo/issues/123", comment="Test comment") def test_parse_github_url(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - commenter = GithubIssueCommenter() + commenter = GitHubIssueCommenter() owner, repo, issue_number = commenter._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" diff --git a/integrations/github/tests/test_issue_viewer.py b/integrations/github/tests/test_issue_viewer.py index d7711c5446..9e66bfbc6c 100644 --- a/integrations/github/tests/test_issue_viewer.py +++ b/integrations/github/tests/test_issue_viewer.py @@ -7,14 +7,14 @@ import requests from haystack.utils import Secret -from haystack_integrations.components.connectors.github.issue_viewer import GithubIssueViewer +from haystack_integrations.components.connectors.github.issue_viewer import GitHubIssueViewer -class TestGithubIssueViewer: +class TestGitHubIssueViewer: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - viewer = GithubIssueViewer() + viewer = GitHubIssueViewer() assert viewer.github_token is None assert viewer.raise_on_failure is True assert viewer.retry_attempts == 2 @@ -23,7 +23,7 @@ def test_init_with_parameters(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") token = Secret.from_env_var("GITHUB_TOKEN") - viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) + viewer = GitHubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) assert viewer.github_token == token assert viewer.raise_on_failure is False assert viewer.retry_attempts == 3 @@ -33,12 +33,12 @@ def test_to_dict(self, monkeypatch): token = Secret.from_env_var("ENV_VAR") - viewer = GithubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) + viewer = GitHubIssueViewer(github_token=token, raise_on_failure=False, retry_attempts=3) data = viewer.to_dict() assert data == { - "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", + "type": "haystack_integrations.components.connectors.github.issue_viewer.GitHubIssueViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -50,7 +50,7 @@ def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") data = { - "type": "haystack_integrations.components.connectors.github.issue_viewer.GithubIssueViewer", + "type": "haystack_integrations.components.connectors.github.issue_viewer.GitHubIssueViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -58,7 +58,7 @@ def test_from_dict(self, monkeypatch): }, } - viewer = GithubIssueViewer.from_dict(data) + viewer = GitHubIssueViewer.from_dict(data) assert viewer.github_token == Secret.from_env_var("ENV_VAR") assert viewer.raise_on_failure is False @@ -109,7 +109,7 @@ def test_run(self, mock_get, monkeypatch): ), ] - viewer = GithubIssueViewer() + viewer = GitHubIssueViewer() result = viewer.run(url="https://github.com/owner/repo/issues/123") @@ -126,7 +126,7 @@ def test_run_error_handling(self, mock_get, monkeypatch): mock_get.side_effect = requests.RequestException("API Error") - viewer = GithubIssueViewer(raise_on_failure=False) + viewer = GitHubIssueViewer(raise_on_failure=False) result = viewer.run(url="https://github.com/owner/repo/issues/123") @@ -134,14 +134,14 @@ def test_run_error_handling(self, mock_get, monkeypatch): assert result["documents"][0].meta["type"] == "error" assert result["documents"][0].meta["error"] is True - viewer = GithubIssueViewer(raise_on_failure=True) + viewer = GitHubIssueViewer(raise_on_failure=True) with pytest.raises(requests.RequestException): viewer.run(url="https://github.com/owner/repo/issues/123") def test_parse_github_url(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - viewer = GithubIssueViewer() + viewer = GitHubIssueViewer() owner, repo, issue_number = viewer._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" diff --git a/integrations/github/tests/test_pr_creator.py b/integrations/github/tests/test_pr_creator.py index 085c2328d9..c27d19a942 100644 --- a/integrations/github/tests/test_pr_creator.py +++ b/integrations/github/tests/test_pr_creator.py @@ -7,38 +7,38 @@ import requests from haystack.utils import Secret -from haystack_integrations.components.connectors.github.pr_creator import GithubPRCreator +from haystack_integrations.components.connectors.github.pr_creator import GitHubPRCreator -class TestGithubPRCreator: +class TestGitHubPRCreator: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - pr_creator = GithubPRCreator() + pr_creator = GitHubPRCreator() assert pr_creator.github_token is not None assert pr_creator.github_token.resolve_value() == "test-token" assert pr_creator.raise_on_failure is True def test_init_with_parameters(self): token = Secret.from_token("test-token") - pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + pr_creator = GitHubPRCreator(github_token=token, raise_on_failure=False) assert pr_creator.github_token == token assert pr_creator.raise_on_failure is False with pytest.raises(TypeError): - GithubPRCreator(github_token="not_a_secret") + GitHubPRCreator(github_token="not_a_secret") def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") - pr_creator = GithubPRCreator(github_token=token, raise_on_failure=False) + pr_creator = GitHubPRCreator(github_token=token, raise_on_failure=False) data = pr_creator.to_dict() assert data == { - "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", + "type": "haystack_integrations.components.connectors.github.pr_creator.GitHubPRCreator", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -49,14 +49,14 @@ def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") data = { - "type": "haystack_integrations.components.connectors.github.pr_creator.GithubPRCreator", + "type": "haystack_integrations.components.connectors.github.pr_creator.GitHubPRCreator", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, }, } - pr_creator = GithubPRCreator.from_dict(data) + pr_creator = GitHubPRCreator.from_dict(data) assert pr_creator.github_token == Secret.from_env_var("ENV_VAR") assert pr_creator.raise_on_failure is False @@ -72,7 +72,7 @@ def test_run(self, mock_post, mock_get, monkeypatch): mock_post.return_value.json.return_value = {"number": 123} mock_post.return_value.raise_for_status.return_value = None - pr_creator = GithubPRCreator() + pr_creator = GitHubPRCreator() with patch.object(pr_creator, "_check_fork_exists", return_value=True): result = pr_creator.run( @@ -91,7 +91,7 @@ def test_run(self, mock_post, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/pulls", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubPRCreator", + "User-Agent": "Haystack/GitHubPRCreator", "Authorization": "Bearer test-token", }, json={ @@ -112,7 +112,7 @@ def test_run_error_handling(self, _, mock_get, monkeypatch): mock_get.side_effect = requests.RequestException("API Error") - pr_creator = GithubPRCreator(raise_on_failure=False) + pr_creator = GitHubPRCreator(raise_on_failure=False) with patch.object(pr_creator, "_check_fork_exists", return_value=True): result = pr_creator.run( @@ -124,7 +124,7 @@ def test_run_error_handling(self, _, mock_get, monkeypatch): assert "Error" in result["result"] - pr_creator = GithubPRCreator(raise_on_failure=True) + pr_creator = GitHubPRCreator(raise_on_failure=True) with pytest.raises(requests.RequestException): pr_creator.run( issue_url="https://github.com/owner/repo/issues/456", diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index be1f0b7882..36a196fe70 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -7,14 +7,14 @@ import requests from haystack.utils import Secret -from haystack_integrations.components.connectors.github.repo_viewer import GithubRepositoryViewer +from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepositoryViewer -class TestGithubRepositoryViewer: +class TestGitHubRepositoryViewer: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - viewer = GithubRepositoryViewer() + viewer = GitHubRepositoryViewer() assert viewer.github_token is None assert viewer.raise_on_failure is True assert viewer.max_file_size == 1_000_000 @@ -23,7 +23,7 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): token = Secret.from_token("test-token") - viewer = GithubRepositoryViewer( + viewer = GitHubRepositoryViewer( github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" ) assert viewer.github_token == token @@ -33,21 +33,21 @@ def test_init_with_parameters(self): assert viewer.branch == "main" with pytest.raises(TypeError): - GithubRepositoryViewer(github_token="not_a_secret") + GitHubRepositoryViewer(github_token="not_a_secret") def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") - viewer = GithubRepositoryViewer( + viewer = GitHubRepositoryViewer( github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" ) data = viewer.to_dict() assert data == { - "type": "haystack_integrations.components.connectors.github.repo_viewer.GithubRepositoryViewer", + "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepositoryViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -61,7 +61,7 @@ def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") data = { - "type": "haystack_integrations.components.connectors.github.repo_viewer.GithubRepositoryViewer", + "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepositoryViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -71,7 +71,7 @@ def test_from_dict(self, monkeypatch): }, } - viewer = GithubRepositoryViewer.from_dict(data) + viewer = GitHubRepositoryViewer.from_dict(data) assert viewer.github_token == Secret.from_env_var("ENV_VAR") assert viewer.raise_on_failure is False @@ -93,7 +93,7 @@ def test_run_file(self, mock_get, monkeypatch): } mock_get.return_value.raise_for_status.return_value = None - viewer = GithubRepositoryViewer() + viewer = GitHubRepositoryViewer() result = viewer.run(repo="owner/repo", path="README.md", branch="main") @@ -106,7 +106,7 @@ def test_run_file(self, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/contents/README.md?ref=main", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubRepositoryViewer", + "User-Agent": "Haystack/GitHubRepositoryViewer", }, timeout=10, ) @@ -127,7 +127,7 @@ def test_run_directory(self, mock_get, monkeypatch): ] mock_get.return_value.raise_for_status.return_value = None - viewer = GithubRepositoryViewer() + viewer = GitHubRepositoryViewer() result = viewer.run(repo="owner/repo", path="", branch="main") @@ -141,7 +141,7 @@ def test_run_directory(self, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/contents/?ref=main", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GithubRepositoryViewer", + "User-Agent": "Haystack/GitHubRepositoryViewer", }, timeout=10, ) @@ -152,21 +152,21 @@ def test_run_error_handling(self, mock_get, monkeypatch): mock_get.side_effect = requests.RequestException("API Error") - viewer = GithubRepositoryViewer(raise_on_failure=False) + viewer = GitHubRepositoryViewer(raise_on_failure=False) result = viewer.run(repo="owner/repo", path="README.md", branch="main") assert len(result["documents"]) == 1 assert result["documents"][0].meta["type"] == "error" - viewer = GithubRepositoryViewer(raise_on_failure=True) + viewer = GitHubRepositoryViewer(raise_on_failure=True) with pytest.raises(requests.RequestException): viewer.run(repo="owner/repo", path="README.md", branch="main") def test_parse_repo(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - viewer = GithubRepositoryViewer() + viewer = GitHubRepositoryViewer() owner, repo = viewer._parse_repo("owner/repo") assert owner == "owner" diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py index 0e7d7015bd..766d43ab7a 100644 --- a/integrations/github/tests/test_repository_forker.py +++ b/integrations/github/tests/test_repository_forker.py @@ -7,14 +7,14 @@ import requests from haystack.utils import Secret -from haystack_integrations.components.connectors.github.repository_forker import GithubRepoForker +from haystack_integrations.components.connectors.github.repository_forker import GitHubRepoForker -class TestGithubRepoForker: +class TestGitHubRepoForker: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - forker = GithubRepoForker() + forker = GitHubRepoForker() assert forker.github_token is not None assert forker.github_token.resolve_value() == "test-token" assert forker.raise_on_failure is True @@ -26,7 +26,7 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): token = Secret.from_token("test-token") - forker = GithubRepoForker( + forker = GitHubRepoForker( github_token=token, raise_on_failure=False, wait_for_completion=True, @@ -45,14 +45,14 @@ def test_init_with_parameters(self): # Test with invalid token type with pytest.raises(TypeError): - GithubRepoForker(github_token="not_a_secret") + GitHubRepoForker(github_token="not_a_secret") def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") - forker = GithubRepoForker( + forker = GitHubRepoForker( github_token=token, raise_on_failure=False, wait_for_completion=True, @@ -65,7 +65,7 @@ def test_to_dict(self, monkeypatch): data = forker.to_dict() assert data == { - "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", + "type": "haystack_integrations.components.connectors.github.repository_forker.GitHubRepoForker", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -81,7 +81,7 @@ def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") data = { - "type": "haystack_integrations.components.connectors.github.repository_forker.GithubRepoForker", + "type": "haystack_integrations.components.connectors.github.repository_forker.GitHubRepoForker", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -93,7 +93,7 @@ def test_from_dict(self, monkeypatch): }, } - forker = GithubRepoForker.from_dict(data) + forker = GitHubRepoForker.from_dict(data) assert forker.github_token == Secret.from_env_var("ENV_VAR") assert forker.raise_on_failure is False @@ -150,7 +150,7 @@ def post_side_effect(url, **_): mock_post.side_effect = post_side_effect - forker = GithubRepoForker(create_branch=True, auto_sync=False) + forker = GitHubRepoForker(create_branch=True, auto_sync=False) result = forker.run(url="https://github.com/owner/repo/issues/123") @@ -212,7 +212,7 @@ def post_side_effect(url, **_): mock_post.side_effect = post_side_effect - forker = GithubRepoForker(create_branch=True, auto_sync=True) + forker = GitHubRepoForker(create_branch=True, auto_sync=True) result = forker.run(url="https://github.com/owner/repo/issues/123") @@ -238,21 +238,21 @@ def test_run_error_handling(self, _, mock_get, monkeypatch): mock_get.side_effect = requests.RequestException("API Error") - forker = GithubRepoForker(raise_on_failure=False) + forker = GitHubRepoForker(raise_on_failure=False) result = forker.run(url="https://github.com/owner/repo/issues/123") assert result["repo"] == "" assert result["issue_branch"] is None - forker = GithubRepoForker(raise_on_failure=True) + forker = GitHubRepoForker(raise_on_failure=True) with pytest.raises(requests.RequestException): forker.run(url="https://github.com/owner/repo/issues/123") def test_parse_github_url(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - forker = GithubRepoForker() + forker = GitHubRepoForker() owner, repo, issue_number = forker._parse_github_url("https://github.com/owner/repo/issues/123") assert owner == "owner" From 380c212f3595e70731c7fed2efde822586db008c Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 30 Apr 2025 14:16:32 +0200 Subject: [PATCH 23/51] add license header --- .../components/connectors/github/__init__.py | 1 - .../components/connectors/github/file_editor.py | 3 +++ .../components/connectors/github/issue_commenter.py | 3 +++ .../components/connectors/github/issue_viewer.py | 3 +++ .../components/connectors/github/pr_creator.py | 3 +++ .../components/connectors/github/repository_forker.py | 3 +++ .../src/haystack_integrations/prompts/github/__init__.py | 1 - .../src/haystack_integrations/prompts/github/comment_tool.py | 3 +++ .../github/src/haystack_integrations/prompts/github/context.py | 3 +++ .../haystack_integrations/prompts/github/file_editor_tool.py | 3 +++ .../haystack_integrations/prompts/github/pr_system_prompt.py | 3 +++ .../haystack_integrations/prompts/github/repo_viewer_tool.py | 3 +++ .../src/haystack_integrations/prompts/github/system_prompt.py | 3 +++ 13 files changed, 33 insertions(+), 2 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py index 08c16559dd..6fe1832023 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 - from .file_editor import Command, GitHubFileEditor from .issue_commenter import GitHubIssueCommenter from .issue_viewer import GitHubIssueViewer diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py index 3de935ddfa..0278030151 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 from base64 import b64decode, b64encode from enum import Enum from typing import Any, Dict, Optional, Union diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py index feb54cc800..c130d94495 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 import re from typing import Any, Dict diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py index 198cec7cee..27623665f3 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 import re from typing import Any, Dict, List, Optional diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py index b0c740b851..9de618d7c5 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 import re from typing import Any, Dict, Optional diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py index f423f0ce78..d9a4f70c87 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 import re from typing import Any, Dict, Optional diff --git a/integrations/github/src/haystack_integrations/prompts/github/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py index 966808a066..10be9ce571 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/__init__.py +++ b/integrations/github/src/haystack_integrations/prompts/github/__init__.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 - from .comment_tool import comment_prompt, comment_schema from .file_editor_tool import file_editor_prompt, file_editor_schema from .pr_system_prompt import system_prompt as pr_system_prompt diff --git a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py index d23bf0b753..0080606d50 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 comment_prompt = """Haystack-Agent uses this tool to post a comment to a GitHub-issue discussion. diff --git a/integrations/github/src/haystack_integrations/prompts/github/context.py b/integrations/github/src/haystack_integrations/prompts/github/context.py index 30b8c2a2ea..89be2e6010 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/context.py +++ b/integrations/github/src/haystack_integrations/prompts/github/context.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 haystack_context_prompt = """Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related questions. The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. diff --git a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py index 493e3979d2..d3788c57bb 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 file_editor_prompt = """Use the file editor to edit an existing file in the repository. You must provide a 'command' for the action that you want to perform: diff --git a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py index 59c9214357..e2d2058b62 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 system_prompt = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent creates Pull Requests that resolve GitHub issues. diff --git a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py index fb3d951274..f19eb0eb63 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 repo_viewer_prompt = """Haystack-Agent uses this tool to browse GitHub repositories. Haystack-Agent can view directories and files with this tool. diff --git a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py index 6993000f4d..b1638633bf 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 issue_prompt = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. From ea95934b7fac9b3e10eccfdc400af2499b23dbed Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 30 Apr 2025 14:19:51 +0200 Subject: [PATCH 24/51] all caps for prompts --- .../prompts/github/__init__.py | 14 +++++++------- .../prompts/github/comment_tool.py | 4 ++-- .../prompts/github/context.py | 2 +- .../prompts/github/file_editor_tool.py | 2 +- .../prompts/github/pr_system_prompt.py | 2 +- .../prompts/github/repo_viewer_tool.py | 2 +- .../prompts/github/system_prompt.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/integrations/github/src/haystack_integrations/prompts/github/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py index 10be9ce571..188c700865 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/__init__.py +++ b/integrations/github/src/haystack_integrations/prompts/github/__init__.py @@ -2,18 +2,18 @@ # # SPDX-License-Identifier: Apache-2.0 from .comment_tool import comment_prompt, comment_schema -from .file_editor_tool import file_editor_prompt, file_editor_schema -from .pr_system_prompt import system_prompt as pr_system_prompt -from .repo_viewer_tool import repo_viewer_prompt, repo_viewer_schema -from .system_prompt import issue_prompt +from .file_editor_tool import FILE_EDITOR_PROMPT, file_editor_schema +from .pr_system_prompt import SYSTEM_PROMPT as pr_system_prompt +from .repo_viewer_tool import REPO_VIEWER_PROMPT, repo_viewer_schema +from .system_prompt import ISSUE_PROMPT __all__ = [ "comment_prompt", "comment_schema", - "file_editor_prompt", + "FILE_EDITOR_PROMPT", "file_editor_schema", - "issue_prompt", + "ISSUE_PROMPT", "pr_system_prompt", - "repo_viewer_prompt", + "REPO_VIEWER_PROMPT", "repo_viewer_schema", ] diff --git a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py index 0080606d50..7f4e2002d6 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -comment_prompt = """Haystack-Agent uses this tool to post a comment to a GitHub-issue discussion. +COMMENT_PROMPT = """Haystack-Agent uses this tool to post a comment to a GitHub-issue discussion. Pass a `comment` string to post a comment. @@ -12,7 +12,7 @@ Haystack-Agent always passes the contents of the comment to the "comment" parameter when calling this tool. """ -comment_schema = { +COMMENT_SCHEMA = { "properties": { "comment": {"type": "string", "description": "The contents of the comment that you want to create."} }, diff --git a/integrations/github/src/haystack_integrations/prompts/github/context.py b/integrations/github/src/haystack_integrations/prompts/github/context.py index 89be2e6010..498c741350 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/context.py +++ b/integrations/github/src/haystack_integrations/prompts/github/context.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -haystack_context_prompt = """Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related +CONTEXT_PROMPT = """Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related questions. The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. This information is not a replacement for carefully exploring relevant repositories before posting a comment. diff --git a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py index d3788c57bb..46cc3ec277 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -file_editor_prompt = """Use the file editor to edit an existing file in the repository. +FILE_EDITOR_PROMPT = """Use the file editor to edit an existing file in the repository. You must provide a 'command' for the action that you want to perform: - edit diff --git a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py index e2d2058b62..d1429e1bca 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -system_prompt = """The assistant is Haystack-Agent, created by deepset. +SYSTEM_PROMPT = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent creates Pull Requests that resolve GitHub issues. Haystack-Agent receives a GitHub issue and all current comments. diff --git a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py index f19eb0eb63..92d9567157 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -repo_viewer_prompt = """Haystack-Agent uses this tool to browse GitHub repositories. +REPO_VIEWER_PROMPT = """Haystack-Agent uses this tool to browse GitHub repositories. Haystack-Agent can view directories and files with this tool. diff --git a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py index b1638633bf..95cda82e3d 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -issue_prompt = """The assistant is Haystack-Agent, created by deepset. +ISSUE_PROMPT = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. Haystack-Agent receives a GitHub issue and all current comments. From 22758e8d25275466f7475c1114618bcdbe084818 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 2 May 2025 10:36:58 +0200 Subject: [PATCH 25/51] add GitHubFileEditorTool --- .../connectors/github/repo_viewer.py | 3 + .../prompts/github/__init__.py | 18 ++--- .../prompts/github/context.py | 4 +- .../prompts/github/file_editor_tool.py | 2 +- .../prompts/github/pr_system_prompt.py | 2 +- .../prompts/github/repo_viewer_tool.py | 2 +- .../tools/github/__init__.py | 8 ++ .../tools/github/file_editor_tool.py | 79 +++++++++++++++++++ .../github/tests/test_file_editor_tool.py | 57 +++++++++++++ 9 files changed, 161 insertions(+), 14 deletions(-) create mode 100644 integrations/github/src/haystack_integrations/tools/github/__init__.py create mode 100644 integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py create mode 100644 integrations/github/tests/test_file_editor_tool.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index aef3e62fa7..f0be799090 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 import base64 from dataclasses import dataclass from typing import Any, Dict, List, Optional diff --git a/integrations/github/src/haystack_integrations/prompts/github/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py index 188c700865..ecef89f8ae 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/__init__.py +++ b/integrations/github/src/haystack_integrations/prompts/github/__init__.py @@ -1,19 +1,19 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from .comment_tool import comment_prompt, comment_schema -from .file_editor_tool import FILE_EDITOR_PROMPT, file_editor_schema -from .pr_system_prompt import SYSTEM_PROMPT as pr_system_prompt -from .repo_viewer_tool import REPO_VIEWER_PROMPT, repo_viewer_schema +from .comment_tool import COMMENT_PROMPT, COMMENT_SCHEMA +from .file_editor_tool import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from .pr_system_prompt import PR_SYSTEM_PROMPT +from .repo_viewer_tool import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA from .system_prompt import ISSUE_PROMPT __all__ = [ - "comment_prompt", - "comment_schema", + "COMMENT_PROMPT", + "COMMENT_SCHEMA", "FILE_EDITOR_PROMPT", - "file_editor_schema", + "FILE_EDITOR_SCHEMA", "ISSUE_PROMPT", - "pr_system_prompt", + "PR_SYSTEM_PROMPT", "REPO_VIEWER_PROMPT", - "repo_viewer_schema", + "REPO_VIEWER_SCHEMA", ] diff --git a/integrations/github/src/haystack_integrations/prompts/github/context.py b/integrations/github/src/haystack_integrations/prompts/github/context.py index 498c741350..a920a0875b 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/context.py +++ b/integrations/github/src/haystack_integrations/prompts/github/context.py @@ -1,8 +1,8 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -CONTEXT_PROMPT = """Haystack-Agent was specifically designed to help developers with the Haystack-framework and any Haystack related -questions. +CONTEXT_PROMPT = """Haystack-Agent was specifically designed to help developers with the Haystack-framework + and any Haystack related questions. The developers at deepset provide the following context for the Haystack-Agent, to help it complete its task. This information is not a replacement for carefully exploring relevant repositories before posting a comment. diff --git a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py index 46cc3ec277..b87e903167 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py @@ -98,7 +98,7 @@ } """ -file_editor_schema = { +FILE_EDITOR_SCHEMA = { "type": "object", "properties": { "command": { diff --git a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py index d1429e1bca..0acfba3a72 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -SYSTEM_PROMPT = """The assistant is Haystack-Agent, created by deepset. +PR_SYSTEM_PROMPT = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent creates Pull Requests that resolve GitHub issues. Haystack-Agent receives a GitHub issue and all current comments. diff --git a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py index 92d9567157..5fbe82ca16 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py @@ -64,7 +64,7 @@ """ -repo_viewer_schema = { +REPO_VIEWER_SCHEMA = { "properties": { "repo": {"type": "string", "description": "The owner/repository_name that you want to view."}, "path": { diff --git a/integrations/github/src/haystack_integrations/tools/github/__init__.py b/integrations/github/src/haystack_integrations/tools/github/__init__.py new file mode 100644 index 0000000000..81747ff39c --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .file_editor_tool import GitHubFileEditorTool + +__all__ = [ + "GitHubFileEditorTool", +] diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py new file mode 100644 index 0000000000..df728c1a71 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -0,0 +1,79 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Dict, Optional + +from haystack import default_from_dict, default_to_dict +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.file_editor import GitHubFileEditor +from haystack_integrations.prompts.github.file_editor_tool import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA + + +class GitHubFileEditorTool(ComponentTool): + """ + A Haystack tool for editing files in GitHub repositories. + """ + + def __init__( + self, + name: Optional[str] = "file_editor", + description: Optional[str] = FILE_EDITOR_PROMPT, + parameters: Optional[Dict[str, Any]] = FILE_EDITOR_SCHEMA, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + repo: Optional[str] = None, + branch: str = "main", + raise_on_failure: bool = True, + ): + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.repo = repo + self.branch = branch + self.raise_on_failure = raise_on_failure + + file_editor = GitHubFileEditor( + github_token=github_token, + repo=repo, + branch=branch, + raise_on_failure=raise_on_failure, + ) + super().__init__( + component=file_editor, + name=name, + description=description, + parameters=parameters, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, + name=self.name, + description=self.description, + parameters=self.parameters, + github_token=self.github_token.to_dict(), + repo=self.repo, + branch=self.branch, + raise_on_failure=self.raise_on_failure, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py new file mode 100644 index 0000000000..6a437e9ebb --- /dev/null +++ b/integrations/github/tests/test_file_editor_tool.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.utils import Secret + +from haystack_integrations.prompts.github.file_editor_tool import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from haystack_integrations.tools.github.file_editor_tool import GitHubFileEditorTool + + +class TestGitHubFileEditorTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubFileEditorTool() + assert tool.name == "file_editor" + assert tool.description == FILE_EDITOR_PROMPT + assert tool.parameters == FILE_EDITOR_SCHEMA + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", + "init_parameters": { + "name": "file_editor", + "description": FILE_EDITOR_PROMPT, + "parameters": FILE_EDITOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "repo": None, + "branch": "main", + "raise_on_failure": True, + }, + } + tool = GitHubFileEditorTool.from_dict(tool_dict) + assert tool.name == "file_editor" + assert tool.description == FILE_EDITOR_PROMPT + assert tool.parameters == FILE_EDITOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubFileEditorTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool" + assert tool_dict["init_parameters"]["name"] == "file_editor" + assert tool_dict["init_parameters"]["description"] == FILE_EDITOR_PROMPT + assert tool_dict["init_parameters"]["parameters"] == FILE_EDITOR_SCHEMA + assert tool_dict["init_parameters"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["init_parameters"]["repo"] is None + assert tool_dict["init_parameters"]["branch"] == "main" + assert tool_dict["init_parameters"]["raise_on_failure"] From 0c4f0f00745bbe48357d936c97d8ad488618cb9d Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Fri, 2 May 2025 17:04:50 +0200 Subject: [PATCH 26/51] enforce kwargs instead of positional args --- .../components/connectors/github/file_editor.py | 1 + .../components/connectors/github/issue_commenter.py | 1 + .../components/connectors/github/issue_viewer.py | 1 + .../components/connectors/github/pr_creator.py | 2 +- .../components/connectors/github/repo_viewer.py | 1 + .../components/connectors/github/repository_forker.py | 1 + .../src/haystack_integrations/tools/github/file_editor_tool.py | 1 + 7 files changed, 7 insertions(+), 1 deletion(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py index 0278030151..1bc675bc3c 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -77,6 +77,7 @@ class GitHubFileEditor: def __init__( self, + *, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), repo: Optional[str] = None, branch: str = "main", diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py index c130d94495..adbeb9a7da 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py @@ -37,6 +37,7 @@ class GitHubIssueCommenter: def __init__( self, + *, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, retry_attempts: int = 2, diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py index 27623665f3..619da1ee35 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py @@ -36,6 +36,7 @@ class GitHubIssueViewer: def __init__( self, + *, github_token: Optional[Secret] = None, raise_on_failure: bool = True, retry_attempts: int = 2, diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py index 9de618d7c5..7131802372 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py @@ -38,7 +38,7 @@ class GitHubPRCreator: ``` """ - def __init__(self, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True): + def __init__(self, *, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True): """ Initialize the component. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index f0be799090..e1370378e4 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -70,6 +70,7 @@ class GitHubRepositoryViewer: def __init__( self, + *, github_token: Optional[Secret] = None, raise_on_failure: bool = True, max_file_size: int = 1_000_000, # 1MB default limit diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py index d9a4f70c87..57f27fd5df 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py @@ -39,6 +39,7 @@ class GitHubRepoForker: def __init__( self, + *, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, wait_for_completion: bool = False, diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index df728c1a71..2548f038b8 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -18,6 +18,7 @@ class GitHubFileEditorTool(ComponentTool): def __init__( self, + *, name: Optional[str] = "file_editor", description: Optional[str] = FILE_EDITOR_PROMPT, parameters: Optional[Dict[str, Any]] = FILE_EDITOR_SCHEMA, From 31add3586a86f9408a2090a11e8495261b5ba6ce Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Mon, 5 May 2025 11:03:04 +0200 Subject: [PATCH 27/51] use _get_request_headers and base_headers consistently --- .../connectors/github/file_editor.py | 30 ++++++++----- .../connectors/github/issue_commenter.py | 4 +- .../connectors/github/issue_viewer.py | 4 +- .../connectors/github/pr_creator.py | 44 ++++++++++--------- .../connectors/github/repo_viewer.py | 19 +++++--- .../connectors/github/repository_forker.py | 32 ++++++++++---- 6 files changed, 84 insertions(+), 49 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py index 1bc675bc3c..cee4939cdf 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -100,16 +100,26 @@ def __init__( self.default_branch = branch self.raise_on_failure = raise_on_failure - self.headers = { + self.base_headers = { "Accept": "application/vnd.github.v3+json", - "Authorization": f"Bearer {self.github_token.resolve_value()}", "User-Agent": "Haystack/GitHubFileEditor", } + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + def _get_file_content(self, owner: str, repo: str, path: str, branch: str) -> tuple[str, str]: """Get file content and SHA from GitHub.""" url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" - response = requests.get(url, headers=self.headers, params={"ref": branch}, timeout=10) + response = requests.get(url, headers=self._get_request_headers(), params={"ref": branch}, timeout=10) response.raise_for_status() data = response.json() content = b64decode(data["content"]).decode("utf-8") @@ -124,7 +134,7 @@ def _update_file(self, owner: str, repo: str, path: str, content: str, message: "sha": sha, "branch": branch, } - response = requests.put(url, headers=self.headers, json=payload, timeout=10) + response = requests.put(url, headers=self._get_request_headers(), json=payload, timeout=10) response.raise_for_status() return True @@ -132,13 +142,13 @@ def _check_last_commit(self, owner: str, repo: str, branch: str) -> bool: """Check if last commit was made by the current token user.""" url = f"https://api.github.com/repos/{owner}/{repo}/commits" params: Dict[str, Union[str, int]] = {"per_page": 1, "sha": branch} - response = requests.get(url, headers=self.headers, params=params, timeout=10) + response = requests.get(url, headers=self._get_request_headers(), params=params, timeout=10) response.raise_for_status() last_commit = response.json()[0] commit_author = last_commit["author"]["login"] # Get current user - user_response = requests.get("https://api.github.com/user", headers=self.headers, timeout=10) + user_response = requests.get("https://api.github.com/user", headers=self._get_request_headers(), timeout=10) user_response.raise_for_status() current_user = user_response.json()["login"] @@ -178,12 +188,12 @@ def _undo_changes(self, owner: str, repo: str, payload: Dict[str, Any], branch: # Get the previous commit SHA params: Dict[str, Union[str, int]] = {"per_page": 2, "sha": branch} - commits = requests.get(commits_url, headers=self.headers, params=params, timeout=10).json() + commits = requests.get(commits_url, headers=self._get_request_headers(), params=params, timeout=10).json() previous_sha = commits[1]["sha"] # Update branch reference to previous commit payload = {"sha": previous_sha, "force": True} - response = requests.patch(url, headers=self.headers, json=payload, timeout=10) + response = requests.patch(url, headers=self._get_request_headers(), json=payload, timeout=10) response.raise_for_status() return "Successfully undid last change" @@ -201,7 +211,7 @@ def _create_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s data = {"message": payload["message"], "content": content, "branch": branch} - response = requests.put(url, headers=self.headers, json=data, timeout=10) + response = requests.put(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() return "File created successfully" @@ -218,7 +228,7 @@ def _delete_file(self, owner: str, repo: str, payload: Dict[str, str], branch: s data = {"message": payload["message"], "sha": sha, "branch": branch} - response = requests.delete(url, headers=self.headers, json=data, timeout=10) + response = requests.delete(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() return "File deleted successfully" diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py index adbeb9a7da..987ce95ad0 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_commenter.py @@ -54,7 +54,7 @@ def __init__( self.retry_attempts = retry_attempts # Set base headers during initialization - self.headers = { + self.base_headers = { "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GitHubIssueCommenter", } @@ -65,7 +65,7 @@ def _get_request_headers(self) -> dict: :return: Dictionary of headers including authorization if token is present """ - headers = self.headers.copy() + headers = self.base_headers.copy() if self.github_token is not None: headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" return headers diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py index 619da1ee35..5805ad59de 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/issue_viewer.py @@ -53,7 +53,7 @@ def __init__( self.retry_attempts = retry_attempts # Only set the basic headers during initialization - self.headers = { + self.base_headers = { "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GitHubIssueViewer", } @@ -64,7 +64,7 @@ def _get_request_headers(self) -> dict: :return: Dictionary of headers including authorization if token is present """ - headers = self.headers.copy() + headers = self.base_headers.copy() if self.github_token: headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" return headers diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py index 7131802372..ecd35f3823 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/pr_creator.py @@ -52,18 +52,22 @@ def __init__(self, *, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN") self.github_token = github_token self.raise_on_failure = raise_on_failure - def _get_headers(self) -> Dict[str, str]: - """ - Get headers for GitHub API requests with resolved token. - - :return: Dictionary of request headers - """ - return { + self.base_headers = { "Accept": "application/vnd.github.v3+json", - "Authorization": f"Bearer {self.github_token.resolve_value()}", "User-Agent": "Haystack/GitHubPRCreator", } + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: """ Parse owner, repo name, and issue number from GitHub issue URL. @@ -81,7 +85,7 @@ def _parse_issue_url(self, issue_url: str) -> tuple[str, str, str]: def _get_authenticated_user(self) -> str: """Get the username of the authenticated user (fork owner).""" - response = requests.get("https://api.github.com/user", headers=self._get_headers(), timeout=10) + response = requests.get("https://api.github.com/user", headers=self._get_request_headers(), timeout=10) response.raise_for_status() return response.json()["login"] @@ -89,7 +93,7 @@ def _check_fork_exists(self, repo: str, fork_owner: str) -> bool: """Check if the fork exists.""" url = f"https://api.github.com/repos/{fork_owner}/{repo}" try: - response = requests.get(url, headers=self._get_headers(), timeout=10) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() fork_data = response.json() return fork_data.get("fork", False) @@ -100,7 +104,7 @@ def _create_fork(self, owner: str, repo: str) -> Optional[str]: """Create a fork of the repository.""" url = f"https://api.github.com/repos/{owner}/{repo}/forks" try: - response = requests.post(url, headers=self._get_headers(), timeout=10) + response = requests.post(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() fork_data = response.json() return fork_data["owner"]["login"] @@ -115,14 +119,14 @@ def _create_branch(self, owner: str, repo: str, branch_name: str, base_branch: s # Get the SHA of the base branch url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{base_branch}" try: - response = requests.get(url, headers=self._get_headers(), timeout=10) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() base_sha = response.json()["object"]["sha"] # Create the new branch url = f"https://api.github.com/repos/{owner}/{repo}/git/refs" data = {"ref": f"refs/heads/{branch_name}", "sha": base_sha} - response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() return True except requests.RequestException as e: @@ -144,14 +148,14 @@ def _create_commit( # Get the current commit SHA url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}" try: - response = requests.get(url, headers=self._get_headers(), timeout=10) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() current_sha = response.json()["object"]["sha"] # Create a blob with the file content url = f"https://api.github.com/repos/{owner}/{repo}/git/blobs" data: dict[str, Any] = {"content": content, "encoding": "base64"} - response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() blob_sha = response.json()["sha"] @@ -161,21 +165,21 @@ def _create_commit( "base_tree": current_sha, "tree": [{"path": file_path, "mode": "100644", "type": "blob", "sha": blob_sha}], } - response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() tree_sha = response.json()["sha"] # Create the commit url = f"https://api.github.com/repos/{owner}/{repo}/git/commits" data = {"message": message, "tree": tree_sha, "parents": [current_sha]} - response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() commit_sha = response.json()["sha"] # Update the branch reference url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/heads/{branch_name}" data = {"sha": commit_sha} - response = requests.patch(url, headers=self._get_headers(), json=data, timeout=10) + response = requests.patch(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() return True except requests.RequestException as e: @@ -197,7 +201,7 @@ def _create_pull_request( url = f"https://api.github.com/repos/{owner}/{repo}/pulls" data = {"title": title, "body": body, "head": branch_name, "base": base_branch} try: - response = requests.post(url, headers=self._get_headers(), json=data, timeout=10) + response = requests.post(url, headers=self._get_request_headers(), json=data, timeout=10) response.raise_for_status() return True except requests.RequestException as e: @@ -246,7 +250,7 @@ def run( "maintainer_can_modify": True, # Allow maintainers to modify the PR } - response = requests.post(url, headers=self._get_headers(), json=pr_data, timeout=10) + response = requests.post(url, headers=self._get_request_headers(), json=pr_data, timeout=10) response.raise_for_status() pr_number = response.json()["number"] diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index e1370378e4..3fab022a91 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -96,11 +96,22 @@ def __init__( self.repo = repo self.branch = branch - self.headers = { + self.base_headers = { "Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GitHubRepositoryViewer", } + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + def to_dict(self) -> Dict[str, Any]: """ Serialize the component to a dictionary. @@ -146,11 +157,7 @@ def _fetch_contents(self, owner: str, repo: str, path: str, ref: str) -> Any: if ref: url += f"?ref={ref}" - headers = self.headers.copy() - if self.github_token: - headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" - - response = requests.get(url, headers=headers, timeout=10) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() return response.json() diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py index 57f27fd5df..c2846db677 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py @@ -71,7 +71,21 @@ def __init__( self.auto_sync = auto_sync self.create_branch = create_branch - self.headers = {"Accept": "application/vnd.github.v3+json", "User-Agent": "Haystack/GitHubRepoForker"} + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubRepoForker", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers def _parse_github_url(self, url: str) -> tuple[str, str, str]: """ @@ -101,7 +115,7 @@ def _check_fork_status(self, fork_path: str) -> bool: try: response = requests.get( url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + headers=self._get_request_headers(), timeout=10, ) return response.status_code == 200 # noqa: PLR2004 @@ -117,7 +131,7 @@ def _get_authenticated_user(self) -> str: """ url = "https://api.github.com/user" response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 + url, headers=self._get_request_headers(), timeout=10 ) response.raise_for_status() return response.json()["login"] @@ -133,7 +147,7 @@ def _get_existing_repository(self, repo_name: str) -> Optional[str]: try: response = requests.get( url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + headers=self._get_request_headers(), timeout=10, ) if response.status_code == 200: # noqa: PLR2004 @@ -153,7 +167,7 @@ def _sync_fork(self, fork_path: str) -> None: url = f"https://api.github.com/repos/{fork_path}/merge-upstream" response = requests.post( url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + headers=self._get_request_headers(), json={"branch": "main"}, timeout=10, ) @@ -170,7 +184,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # First, get the default branch SHA url = f"https://api.github.com/repos/{fork_path}" response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 + url, headers=self._get_request_headers(), timeout=10 ) response.raise_for_status() default_branch = response.json()["default_branch"] @@ -178,7 +192,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: # Get the SHA of the default branch url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" response = requests.get( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 + url, headers=self._get_request_headers(), timeout=10 ) response.raise_for_status() sha = response.json()["object"]["sha"] @@ -188,7 +202,7 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: url = f"https://api.github.com/repos/{fork_path}/git/refs" response = requests.post( url, - headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, + headers=self._get_request_headers(), json={"ref": f"refs/heads/{branch_name}", "sha": sha}, timeout=10, ) @@ -205,7 +219,7 @@ def _create_fork(self, owner: str, repo: str) -> str: """ url = f"https://api.github.com/repos/{owner}/{repo}/forks" response = requests.post( - url, headers={**self.headers, "Authorization": f"Bearer {self.github_token.resolve_value()}"}, timeout=10 + url, headers=self._get_request_headers(), timeout=10 ) response.raise_for_status() From 474674c43f33bc6565d506609d861186040e5d9f Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Mon, 5 May 2025 11:49:16 +0200 Subject: [PATCH 28/51] lint --- .../connectors/github/repository_forker.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py index c2846db677..dc5188b90e 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py @@ -130,9 +130,7 @@ def _get_authenticated_user(self) -> str: :raises requests.RequestException: If API call fails """ url = "https://api.github.com/user" - response = requests.get( - url, headers=self._get_request_headers(), timeout=10 - ) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() return response.json()["login"] @@ -183,17 +181,13 @@ def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: """ # First, get the default branch SHA url = f"https://api.github.com/repos/{fork_path}" - response = requests.get( - url, headers=self._get_request_headers(), timeout=10 - ) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() default_branch = response.json()["default_branch"] # Get the SHA of the default branch url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" - response = requests.get( - url, headers=self._get_request_headers(), timeout=10 - ) + response = requests.get(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() sha = response.json()["object"]["sha"] @@ -218,9 +212,7 @@ def _create_fork(self, owner: str, repo: str) -> str: :raises requests.RequestException: If fork creation fails """ url = f"https://api.github.com/repos/{owner}/{repo}/forks" - response = requests.post( - url, headers=self._get_request_headers(), timeout=10 - ) + response = requests.post(url, headers=self._get_request_headers(), timeout=10) response.raise_for_status() fork_data = response.json() From b30b21f5f2c9d518d017fdf2e12409ae056536fa Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Mon, 5 May 2025 11:55:00 +0200 Subject: [PATCH 29/51] rename GitHubRepositoryViewer to GitHubRepoViewer --- .../components/connectors/github/__init__.py | 6 +- .../connectors/github/repo_viewer.py | 10 +- .../connectors/github/repository_forker.py | 306 ------------------ integrations/github/tests/test_repo_viewer.py | 32 +- .../github/tests/test_repository_forker.py | 263 --------------- 5 files changed, 24 insertions(+), 593 deletions(-) delete mode 100644 integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py delete mode 100644 integrations/github/tests/test_repository_forker.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py index 6fe1832023..93935d7fd0 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py @@ -5,8 +5,8 @@ from .issue_commenter import GitHubIssueCommenter from .issue_viewer import GitHubIssueViewer from .pr_creator import GitHubPRCreator -from .repo_viewer import GitHubRepositoryViewer -from .repository_forker import GitHubRepoForker +from .repo_viewer import GitHubRepoViewer +from .repo_forker import GitHubRepoForker __all__ = [ "Command", @@ -15,5 +15,5 @@ "GitHubIssueViewer", "GitHubPRCreator", "GitHubRepoForker", - "GitHubRepositoryViewer", + "GitHubRepoViewer", ] diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index 3fab022a91..2ddb34e7e9 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -25,7 +25,7 @@ class GitHubItem: @component -class GitHubRepositoryViewer: +class GitHubRepoViewer: """ Navigates and fetches content from GitHub repositories. @@ -46,9 +46,9 @@ class GitHubRepositoryViewer: ### Usage example ```python - from haystack_integrations.components.connectors.github import GitHubRepositoryViewer + from haystack_integrations.components.connectors.github import GitHubRepoViewer - viewer = GitHubRepositoryViewer() + viewer = GitHubRepoViewer() # List directory contents - returns multiple documents result = viewer.run( @@ -98,7 +98,7 @@ def __init__( self.base_headers = { "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GitHubRepositoryViewer", + "User-Agent": "Haystack/GitHubRepoViewer", } def _get_request_headers(self) -> dict: @@ -128,7 +128,7 @@ def to_dict(self) -> Dict[str, Any]: ) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepositoryViewer": + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewer": """ Deserialize the component from a dictionary. diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py deleted file mode 100644 index dc5188b90e..0000000000 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repository_forker.py +++ /dev/null @@ -1,306 +0,0 @@ -# SPDX-FileCopyrightText: 2023-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -import re -from typing import Any, Dict, Optional - -import requests -from haystack import component, default_from_dict, default_to_dict, logging -from haystack.utils import Secret, deserialize_secrets_inplace - -logger = logging.getLogger(__name__) - - -@component -class GitHubRepoForker: - """ - Forks a GitHub repository from an issue URL. - - The component takes a GitHub issue URL, extracts the repository information, - creates or syncs a fork of that repository, and optionally creates an issue-specific branch. - - ### Usage example - ```python - from haystack_integrations.components.connectors.github import GitHubRepoForker - from haystack.utils import Secret - - # Using direct token with auto-sync and branch creation - forker = GitHubRepoForker( - github_token=Secret.from_env_var("GITHUB_TOKEN"), - auto_sync=True, - create_branch=True - ) - - result = forker.run(url="https://github.com/owner/repo/issues/123") - print(result) - # Will create or sync fork and create branch "fix-123" - ``` - """ - - def __init__( - self, - *, - github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), - raise_on_failure: bool = True, - wait_for_completion: bool = False, - max_wait_seconds: int = 300, - poll_interval: int = 2, - auto_sync: bool = True, - create_branch: bool = True, - ): - """ - Initialize the component. - - :param github_token: GitHub personal access token for API authentication - :param raise_on_failure: If True, raises exceptions on API errors - :param wait_for_completion: If True, waits until fork is fully created - :param max_wait_seconds: Maximum time to wait for fork completion in seconds - :param poll_interval: Time between status checks in seconds - :param auto_sync: If True, syncs fork with original repository if it already exists - :param create_branch: If True, creates a fix branch based on the issue number - """ - error_message = "github_token must be a Secret" - if not isinstance(github_token, Secret): - raise TypeError(error_message) - - self.github_token = github_token - self.raise_on_failure = raise_on_failure - self.wait_for_completion = wait_for_completion - self.max_wait_seconds = max_wait_seconds - self.poll_interval = poll_interval - self.auto_sync = auto_sync - self.create_branch = create_branch - - self.base_headers = { - "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GitHubRepoForker", - } - - def _get_request_headers(self) -> dict: - """ - Get headers with resolved token for the request. - - :return: Dictionary of headers including authorization if token is present - """ - headers = self.base_headers.copy() - if self.github_token is not None: - headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" - return headers - - def _parse_github_url(self, url: str) -> tuple[str, str, str]: - """ - Parse GitHub URL into owner, repo, and issue number. - - :param url: GitHub issue URL - :return: Tuple of (owner, repo, issue_number) - :raises ValueError: If URL format is invalid - """ - pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" - match = re.match(pattern, url) - if not match: - error_message = f"Invalid GitHub issue URL format: {url}" - raise ValueError(error_message) - - owner, repo, issue_number = match.groups() - return owner, repo, issue_number - - def _check_fork_status(self, fork_path: str) -> bool: - """ - Check if a forked repository exists and is ready. - - :param fork_path: Repository path in owner/repo format - :return: True if fork exists and is ready, False otherwise - """ - url = f"https://api.github.com/repos/{fork_path}" - try: - response = requests.get( - url, - headers=self._get_request_headers(), - timeout=10, - ) - return response.status_code == 200 # noqa: PLR2004 - except requests.RequestException: - return False - - def _get_authenticated_user(self) -> str: - """ - Get the authenticated user's username. - - :return: Username of the authenticated user - :raises requests.RequestException: If API call fails - """ - url = "https://api.github.com/user" - response = requests.get(url, headers=self._get_request_headers(), timeout=10) - response.raise_for_status() - return response.json()["login"] - - def _get_existing_repository(self, repo_name: str) -> Optional[str]: - """ - Check if a repository with the given name already exists in the authenticated user's account. - - :param repo_name: Repository name to check - :return: Full repository name if it exists, None otherwise - """ - url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" - try: - response = requests.get( - url, - headers=self._get_request_headers(), - timeout=10, - ) - if response.status_code == 200: # noqa: PLR2004 - return repo_name - return None - except requests.RequestException as e: - logger.warning(f"Failed to check repository existence: {e!s}") - return None - - def _sync_fork(self, fork_path: str) -> None: - """ - Sync a fork with its upstream repository. - - :param fork_path: Fork path in owner/repo format - :raises requests.RequestException: If sync fails - """ - url = f"https://api.github.com/repos/{fork_path}/merge-upstream" - response = requests.post( - url, - headers=self._get_request_headers(), - json={"branch": "main"}, - timeout=10, - ) - response.raise_for_status() - - def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: - """ - Create a new branch for the issue. - - :param fork_path: Fork path in owner/repo format - :param issue_number: Issue number to use in branch name - :raises requests.RequestException: If branch creation fails - """ - # First, get the default branch SHA - url = f"https://api.github.com/repos/{fork_path}" - response = requests.get(url, headers=self._get_request_headers(), timeout=10) - response.raise_for_status() - default_branch = response.json()["default_branch"] - - # Get the SHA of the default branch - url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" - response = requests.get(url, headers=self._get_request_headers(), timeout=10) - response.raise_for_status() - sha = response.json()["object"]["sha"] - - # Create the new branch - branch_name = f"fix-{issue_number}" - url = f"https://api.github.com/repos/{fork_path}/git/refs" - response = requests.post( - url, - headers=self._get_request_headers(), - json={"ref": f"refs/heads/{branch_name}", "sha": sha}, - timeout=10, - ) - response.raise_for_status() - - def _create_fork(self, owner: str, repo: str) -> str: - """ - Create a fork of the repository. - - :param owner: Original repository owner - :param repo: Repository name - :return: Fork path in owner/repo format - :raises requests.RequestException: If fork creation fails - """ - url = f"https://api.github.com/repos/{owner}/{repo}/forks" - response = requests.post(url, headers=self._get_request_headers(), timeout=10) - response.raise_for_status() - - fork_data = response.json() - return f"{fork_data['owner']['login']}/{fork_data['name']}" - - def to_dict(self) -> Dict[str, Any]: - """ - Serialize the component to a dictionary. - - :returns: Dictionary with serialized data. - """ - return default_to_dict( - self, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure, - wait_for_completion=self.wait_for_completion, - max_wait_seconds=self.max_wait_seconds, - poll_interval=self.poll_interval, - auto_sync=self.auto_sync, - create_branch=self.create_branch, - ) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForker": - """ - Deserialize the component from a dictionary. - - :param data: Dictionary to deserialize from. - :returns: Deserialized component. - """ - init_params = data["init_parameters"] - deserialize_secrets_inplace(init_params, keys=["github_token"]) - return default_from_dict(cls, data) - - @component.output_types(repo=str, issue_branch=str) - def run(self, url: str) -> dict: - """ - Process a GitHub issue URL and create or sync a fork of the repository. - - :param url: GitHub issue URL - :return: Dictionary containing repository path in owner/repo format - """ - try: - # Extract repository information - owner, repo, issue_number = self._parse_github_url(url) - - # Check if fork already exists - user = self._get_authenticated_user() - existing_fork = self._get_existing_repository(repo) - - if existing_fork and self.auto_sync: - # If fork exists and auto_sync is enabled, sync with upstream - fork_path = f"{user}/{repo}" - logger.info("Fork already exists, syncing with upstream repository") - self._sync_fork(fork_path) - else: - # Create new fork - fork_path = self._create_fork(owner, repo) - - # Wait for fork completion if requested - if self.wait_for_completion: - import time - - start_time = time.time() - - while time.time() - start_time < self.max_wait_seconds: - if self._check_fork_status(fork_path): - logger.info("Fork creation completed successfully") - break - logger.debug("Waiting for fork creation to complete...") - time.sleep(self.poll_interval) - else: - msg = f"Fork creation timed out after {self.max_wait_seconds} seconds" - if self.raise_on_failure: - raise TimeoutError(msg) - logger.warning(msg) - - # Create issue branch if enabled - issue_branch = None - if self.create_branch: - issue_branch = f"fix-{issue_number}" - logger.info(f"Creating branch for issue #{issue_number}") - self._create_issue_branch(fork_path, issue_number) - - return {"repo": fork_path, "issue_branch": issue_branch} - - except Exception as e: - if self.raise_on_failure: - raise - logger.warning("Error forking repository from {url}: {error}", url=url, error=str(e)) - return {"repo": "", "issue_branch": None} diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index 36a196fe70..5557ca61d5 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -7,14 +7,14 @@ import requests from haystack.utils import Secret -from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepositoryViewer +from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer -class TestGitHubRepositoryViewer: +class TestGitHubRepoViewer: def test_init_default(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - viewer = GitHubRepositoryViewer() + viewer = GitHubRepoViewer() assert viewer.github_token is None assert viewer.raise_on_failure is True assert viewer.max_file_size == 1_000_000 @@ -23,7 +23,7 @@ def test_init_default(self, monkeypatch): def test_init_with_parameters(self): token = Secret.from_token("test-token") - viewer = GitHubRepositoryViewer( + viewer = GitHubRepoViewer( github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" ) assert viewer.github_token == token @@ -33,21 +33,21 @@ def test_init_with_parameters(self): assert viewer.branch == "main" with pytest.raises(TypeError): - GitHubRepositoryViewer(github_token="not_a_secret") + GitHubRepoViewer(github_token="not_a_secret") def test_to_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") token = Secret.from_env_var("ENV_VAR") - viewer = GitHubRepositoryViewer( + viewer = GitHubRepoViewer( github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" ) data = viewer.to_dict() assert data == { - "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepositoryViewer", + "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepoViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -61,7 +61,7 @@ def test_from_dict(self, monkeypatch): monkeypatch.setenv("ENV_VAR", "test-token") data = { - "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepositoryViewer", + "type": "haystack_integrations.components.connectors.github.repo_viewer.GitHubRepoViewer", "init_parameters": { "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, "raise_on_failure": False, @@ -71,7 +71,7 @@ def test_from_dict(self, monkeypatch): }, } - viewer = GitHubRepositoryViewer.from_dict(data) + viewer = GitHubRepoViewer.from_dict(data) assert viewer.github_token == Secret.from_env_var("ENV_VAR") assert viewer.raise_on_failure is False @@ -93,7 +93,7 @@ def test_run_file(self, mock_get, monkeypatch): } mock_get.return_value.raise_for_status.return_value = None - viewer = GitHubRepositoryViewer() + viewer = GitHubRepoViewer() result = viewer.run(repo="owner/repo", path="README.md", branch="main") @@ -106,7 +106,7 @@ def test_run_file(self, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/contents/README.md?ref=main", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GitHubRepositoryViewer", + "User-Agent": "Haystack/GitHubRepoViewer", }, timeout=10, ) @@ -127,7 +127,7 @@ def test_run_directory(self, mock_get, monkeypatch): ] mock_get.return_value.raise_for_status.return_value = None - viewer = GitHubRepositoryViewer() + viewer = GitHubRepoViewer() result = viewer.run(repo="owner/repo", path="", branch="main") @@ -141,7 +141,7 @@ def test_run_directory(self, mock_get, monkeypatch): "https://api.github.com/repos/owner/repo/contents/?ref=main", headers={ "Accept": "application/vnd.github.v3+json", - "User-Agent": "Haystack/GitHubRepositoryViewer", + "User-Agent": "Haystack/GitHubRepoViewer", }, timeout=10, ) @@ -152,21 +152,21 @@ def test_run_error_handling(self, mock_get, monkeypatch): mock_get.side_effect = requests.RequestException("API Error") - viewer = GitHubRepositoryViewer(raise_on_failure=False) + viewer = GitHubRepoViewer(raise_on_failure=False) result = viewer.run(repo="owner/repo", path="README.md", branch="main") assert len(result["documents"]) == 1 assert result["documents"][0].meta["type"] == "error" - viewer = GitHubRepositoryViewer(raise_on_failure=True) + viewer = GitHubRepoViewer(raise_on_failure=True) with pytest.raises(requests.RequestException): viewer.run(repo="owner/repo", path="README.md", branch="main") def test_parse_repo(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") - viewer = GitHubRepositoryViewer() + viewer = GitHubRepoViewer() owner, repo = viewer._parse_repo("owner/repo") assert owner == "owner" diff --git a/integrations/github/tests/test_repository_forker.py b/integrations/github/tests/test_repository_forker.py deleted file mode 100644 index 766d43ab7a..0000000000 --- a/integrations/github/tests/test_repository_forker.py +++ /dev/null @@ -1,263 +0,0 @@ -# SPDX-FileCopyrightText: 2023-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -from unittest.mock import patch - -import pytest -import requests -from haystack.utils import Secret - -from haystack_integrations.components.connectors.github.repository_forker import GitHubRepoForker - - -class TestGitHubRepoForker: - def test_init_default(self, monkeypatch): - monkeypatch.setenv("GITHUB_TOKEN", "test-token") - - forker = GitHubRepoForker() - assert forker.github_token is not None - assert forker.github_token.resolve_value() == "test-token" - assert forker.raise_on_failure is True - assert forker.wait_for_completion is False - assert forker.max_wait_seconds == 300 - assert forker.poll_interval == 2 - assert forker.auto_sync is True - assert forker.create_branch is True - - def test_init_with_parameters(self): - token = Secret.from_token("test-token") - forker = GitHubRepoForker( - github_token=token, - raise_on_failure=False, - wait_for_completion=True, - max_wait_seconds=60, - poll_interval=1, - auto_sync=False, - create_branch=False, - ) - assert forker.github_token == token - assert forker.raise_on_failure is False - assert forker.wait_for_completion is True - assert forker.max_wait_seconds == 60 - assert forker.poll_interval == 1 - assert forker.auto_sync is False - assert forker.create_branch is False - - # Test with invalid token type - with pytest.raises(TypeError): - GitHubRepoForker(github_token="not_a_secret") - - def test_to_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test-token") - - token = Secret.from_env_var("ENV_VAR") - - forker = GitHubRepoForker( - github_token=token, - raise_on_failure=False, - wait_for_completion=True, - max_wait_seconds=60, - poll_interval=1, - auto_sync=False, - create_branch=False, - ) - - data = forker.to_dict() - - assert data == { - "type": "haystack_integrations.components.connectors.github.repository_forker.GitHubRepoForker", - "init_parameters": { - "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, - "raise_on_failure": False, - "wait_for_completion": True, - "max_wait_seconds": 60, - "poll_interval": 1, - "auto_sync": False, - "create_branch": False, - }, - } - - def test_from_dict(self, monkeypatch): - monkeypatch.setenv("ENV_VAR", "test-token") - - data = { - "type": "haystack_integrations.components.connectors.github.repository_forker.GitHubRepoForker", - "init_parameters": { - "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, - "raise_on_failure": False, - "wait_for_completion": True, - "max_wait_seconds": 60, - "poll_interval": 1, - "auto_sync": False, - "create_branch": False, - }, - } - - forker = GitHubRepoForker.from_dict(data) - - assert forker.github_token == Secret.from_env_var("ENV_VAR") - assert forker.raise_on_failure is False - assert forker.wait_for_completion is True - assert forker.max_wait_seconds == 60 - assert forker.poll_interval == 1 - assert forker.auto_sync is False - assert forker.create_branch is False - - @patch("requests.get") - @patch("requests.post") - def test_run_create_fork(self, mock_post, mock_get, monkeypatch): - monkeypatch.setenv("GITHUB_TOKEN", "test-token") - - def create_mock_response(json_data, status_code=200): - class MockResponse: - def __init__(self, data, code): - self._data = data - self.status_code = code - - def json(self): - return self._data - - def raise_for_status(self): - if self.status_code >= 400: - error_message = f"HTTP {self.status_code}" - raise requests.RequestException(error_message) - - return MockResponse(json_data, status_code) - - get_responses = { - "https://api.github.com/user": create_mock_response({"login": "test_user"}), - "https://api.github.com/repos/test_user/repo": create_mock_response( - {}, status_code=404 - ), # Fork doesn't exist - "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( - {"object": {"sha": "abc123"}} - ), - } - - def get_side_effect(url, **_): - if url == "https://api.github.com/repos/test_user/repo": - if mock_get.call_count == 2: - return create_mock_response({}, status_code=404) # Fork doesn't exist - return create_mock_response({"default_branch": "main"}) - return get_responses.get(url, create_mock_response({"default_branch": "main"})) - - mock_get.side_effect = get_side_effect - - def post_side_effect(url, **_): - if "forks" in url: - return create_mock_response({"owner": {"login": "test_user"}, "name": "repo"}) - return create_mock_response({}) - - mock_post.side_effect = post_side_effect - - forker = GitHubRepoForker(create_branch=True, auto_sync=False) - - result = forker.run(url="https://github.com/owner/repo/issues/123") - - assert result["repo"] == "test_user/repo" - assert result["issue_branch"] == "fix-123" - - assert mock_get.call_count == 5 # user (2x), check fork status, get default branch, get SHA - - get_calls = [call[0][0] for call in mock_get.call_args_list] - assert get_calls.count("https://api.github.com/user") == 2 # get user, check fork - assert get_calls.count("https://api.github.com/repos/test_user/repo") == 2 # check status, get default branch - assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls - - post_calls = [call[0][0] for call in mock_post.call_args_list] - assert "https://api.github.com/repos/owner/repo/forks" in post_calls - assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls - assert mock_post.call_count == 2 # One for fork creation, one for branch creation - - @patch("requests.get") - @patch("requests.post") - def test_run_sync_existing_fork(self, mock_post, mock_get, monkeypatch): - monkeypatch.setenv("GITHUB_TOKEN", "test-token") - - def create_mock_response(json_data, status_code=200): - class MockResponse: - def __init__(self, data, code): - self._data = data - self.status_code = code - - def json(self): - return self._data - - def raise_for_status(self): - if self.status_code >= 400: - error_message = f"HTTP {self.status_code}" - raise requests.RequestException(error_message) - - return MockResponse(json_data, status_code) - - get_responses = { - "https://api.github.com/user": create_mock_response({"login": "test_user"}), - "https://api.github.com/repos/test_user/repo": create_mock_response( - {"name": "repo", "default_branch": "main"} - ), - "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( - {"object": {"sha": "abc123"}} - ), - } - - def get_side_effect(url, **_): - return get_responses.get(url, create_mock_response({"default_branch": "main"})) - - mock_get.side_effect = get_side_effect - - def post_side_effect(url, **_): - if "merge-upstream" in url: - return create_mock_response({}) - return create_mock_response({}) - - mock_post.side_effect = post_side_effect - - forker = GitHubRepoForker(create_branch=True, auto_sync=True) - - result = forker.run(url="https://github.com/owner/repo/issues/123") - - assert result["repo"] == "test_user/repo" - assert result["issue_branch"] == "fix-123" - - assert mock_get.call_count == 5 # user, check fork, check fork status, get default branch, get SHA - - get_calls = [call[0][0] for call in mock_get.call_args_list] - assert "https://api.github.com/user" in get_calls - assert "https://api.github.com/repos/test_user/repo" in get_calls - assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls - - post_calls = [call[0][0] for call in mock_post.call_args_list] - assert "https://api.github.com/repos/test_user/repo/merge-upstream" in post_calls - assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls - assert mock_post.call_count == 2 # One for sync, one for branch creation - - @patch("requests.get") - @patch("requests.post") - def test_run_error_handling(self, _, mock_get, monkeypatch): - monkeypatch.setenv("GITHUB_TOKEN", "test-token") - - mock_get.side_effect = requests.RequestException("API Error") - - forker = GitHubRepoForker(raise_on_failure=False) - - result = forker.run(url="https://github.com/owner/repo/issues/123") - - assert result["repo"] == "" - assert result["issue_branch"] is None - - forker = GitHubRepoForker(raise_on_failure=True) - with pytest.raises(requests.RequestException): - forker.run(url="https://github.com/owner/repo/issues/123") - - def test_parse_github_url(self, monkeypatch): - monkeypatch.setenv("GITHUB_TOKEN", "test-token") - - forker = GitHubRepoForker() - - owner, repo, issue_number = forker._parse_github_url("https://github.com/owner/repo/issues/123") - assert owner == "owner" - assert repo == "repo" - assert issue_number == "123" - - with pytest.raises(ValueError): - forker._parse_github_url("https://github.com/invalid/url") From 4f8d0d3a89314d79bd2e07e9fa6fd9950c59f120 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Mon, 5 May 2025 12:00:48 +0200 Subject: [PATCH 30/51] lint --- .../components/connectors/github/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py index 93935d7fd0..ea4a4969b4 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/__init__.py @@ -5,8 +5,8 @@ from .issue_commenter import GitHubIssueCommenter from .issue_viewer import GitHubIssueViewer from .pr_creator import GitHubPRCreator -from .repo_viewer import GitHubRepoViewer from .repo_forker import GitHubRepoForker +from .repo_viewer import GitHubRepoViewer __all__ = [ "Command", From 4555d2ab25f2450f8fe9c2ef2faff830050d4bac Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Mon, 5 May 2025 14:28:14 +0200 Subject: [PATCH 31/51] add pipeline serialization test --- .../connectors/github/repo_forker.py | 306 ++++++++++++++++++ .../github/tests/test_file_editor_tool.py | 42 ++- integrations/github/tests/test_repo_forker.py | 263 +++++++++++++++ 3 files changed, 610 insertions(+), 1 deletion(-) create mode 100644 integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py create mode 100644 integrations/github/tests/test_repo_forker.py diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py new file mode 100644 index 0000000000..dc5188b90e --- /dev/null +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py @@ -0,0 +1,306 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +import re +from typing import Any, Dict, Optional + +import requests +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace + +logger = logging.getLogger(__name__) + + +@component +class GitHubRepoForker: + """ + Forks a GitHub repository from an issue URL. + + The component takes a GitHub issue URL, extracts the repository information, + creates or syncs a fork of that repository, and optionally creates an issue-specific branch. + + ### Usage example + ```python + from haystack_integrations.components.connectors.github import GitHubRepoForker + from haystack.utils import Secret + + # Using direct token with auto-sync and branch creation + forker = GitHubRepoForker( + github_token=Secret.from_env_var("GITHUB_TOKEN"), + auto_sync=True, + create_branch=True + ) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + print(result) + # Will create or sync fork and create branch "fix-123" + ``` + """ + + def __init__( + self, + *, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + wait_for_completion: bool = False, + max_wait_seconds: int = 300, + poll_interval: int = 2, + auto_sync: bool = True, + create_branch: bool = True, + ): + """ + Initialize the component. + + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param wait_for_completion: If True, waits until fork is fully created + :param max_wait_seconds: Maximum time to wait for fork completion in seconds + :param poll_interval: Time between status checks in seconds + :param auto_sync: If True, syncs fork with original repository if it already exists + :param create_branch: If True, creates a fix branch based on the issue number + """ + error_message = "github_token must be a Secret" + if not isinstance(github_token, Secret): + raise TypeError(error_message) + + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.wait_for_completion = wait_for_completion + self.max_wait_seconds = max_wait_seconds + self.poll_interval = poll_interval + self.auto_sync = auto_sync + self.create_branch = create_branch + + self.base_headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "Haystack/GitHubRepoForker", + } + + def _get_request_headers(self) -> dict: + """ + Get headers with resolved token for the request. + + :return: Dictionary of headers including authorization if token is present + """ + headers = self.base_headers.copy() + if self.github_token is not None: + headers["Authorization"] = f"Bearer {self.github_token.resolve_value()}" + return headers + + def _parse_github_url(self, url: str) -> tuple[str, str, str]: + """ + Parse GitHub URL into owner, repo, and issue number. + + :param url: GitHub issue URL + :return: Tuple of (owner, repo, issue_number) + :raises ValueError: If URL format is invalid + """ + pattern = r"https?://github\.com/([^/]+)/([^/]+)/issues/(\d+)" + match = re.match(pattern, url) + if not match: + error_message = f"Invalid GitHub issue URL format: {url}" + raise ValueError(error_message) + + owner, repo, issue_number = match.groups() + return owner, repo, issue_number + + def _check_fork_status(self, fork_path: str) -> bool: + """ + Check if a forked repository exists and is ready. + + :param fork_path: Repository path in owner/repo format + :return: True if fork exists and is ready, False otherwise + """ + url = f"https://api.github.com/repos/{fork_path}" + try: + response = requests.get( + url, + headers=self._get_request_headers(), + timeout=10, + ) + return response.status_code == 200 # noqa: PLR2004 + except requests.RequestException: + return False + + def _get_authenticated_user(self) -> str: + """ + Get the authenticated user's username. + + :return: Username of the authenticated user + :raises requests.RequestException: If API call fails + """ + url = "https://api.github.com/user" + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + return response.json()["login"] + + def _get_existing_repository(self, repo_name: str) -> Optional[str]: + """ + Check if a repository with the given name already exists in the authenticated user's account. + + :param repo_name: Repository name to check + :return: Full repository name if it exists, None otherwise + """ + url = f"https://api.github.com/repos/{self._get_authenticated_user()}/{repo_name}" + try: + response = requests.get( + url, + headers=self._get_request_headers(), + timeout=10, + ) + if response.status_code == 200: # noqa: PLR2004 + return repo_name + return None + except requests.RequestException as e: + logger.warning(f"Failed to check repository existence: {e!s}") + return None + + def _sync_fork(self, fork_path: str) -> None: + """ + Sync a fork with its upstream repository. + + :param fork_path: Fork path in owner/repo format + :raises requests.RequestException: If sync fails + """ + url = f"https://api.github.com/repos/{fork_path}/merge-upstream" + response = requests.post( + url, + headers=self._get_request_headers(), + json={"branch": "main"}, + timeout=10, + ) + response.raise_for_status() + + def _create_issue_branch(self, fork_path: str, issue_number: str) -> None: + """ + Create a new branch for the issue. + + :param fork_path: Fork path in owner/repo format + :param issue_number: Issue number to use in branch name + :raises requests.RequestException: If branch creation fails + """ + # First, get the default branch SHA + url = f"https://api.github.com/repos/{fork_path}" + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + default_branch = response.json()["default_branch"] + + # Get the SHA of the default branch + url = f"https://api.github.com/repos/{fork_path}/git/ref/heads/{default_branch}" + response = requests.get(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + sha = response.json()["object"]["sha"] + + # Create the new branch + branch_name = f"fix-{issue_number}" + url = f"https://api.github.com/repos/{fork_path}/git/refs" + response = requests.post( + url, + headers=self._get_request_headers(), + json={"ref": f"refs/heads/{branch_name}", "sha": sha}, + timeout=10, + ) + response.raise_for_status() + + def _create_fork(self, owner: str, repo: str) -> str: + """ + Create a fork of the repository. + + :param owner: Original repository owner + :param repo: Repository name + :return: Fork path in owner/repo format + :raises requests.RequestException: If fork creation fails + """ + url = f"https://api.github.com/repos/{owner}/{repo}/forks" + response = requests.post(url, headers=self._get_request_headers(), timeout=10) + response.raise_for_status() + + fork_data = response.json() + return f"{fork_data['owner']['login']}/{fork_data['name']}" + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + wait_for_completion=self.wait_for_completion, + max_wait_seconds=self.max_wait_seconds, + poll_interval=self.poll_interval, + auto_sync=self.auto_sync, + create_branch=self.create_branch, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForker": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + init_params = data["init_parameters"] + deserialize_secrets_inplace(init_params, keys=["github_token"]) + return default_from_dict(cls, data) + + @component.output_types(repo=str, issue_branch=str) + def run(self, url: str) -> dict: + """ + Process a GitHub issue URL and create or sync a fork of the repository. + + :param url: GitHub issue URL + :return: Dictionary containing repository path in owner/repo format + """ + try: + # Extract repository information + owner, repo, issue_number = self._parse_github_url(url) + + # Check if fork already exists + user = self._get_authenticated_user() + existing_fork = self._get_existing_repository(repo) + + if existing_fork and self.auto_sync: + # If fork exists and auto_sync is enabled, sync with upstream + fork_path = f"{user}/{repo}" + logger.info("Fork already exists, syncing with upstream repository") + self._sync_fork(fork_path) + else: + # Create new fork + fork_path = self._create_fork(owner, repo) + + # Wait for fork completion if requested + if self.wait_for_completion: + import time + + start_time = time.time() + + while time.time() - start_time < self.max_wait_seconds: + if self._check_fork_status(fork_path): + logger.info("Fork creation completed successfully") + break + logger.debug("Waiting for fork creation to complete...") + time.sleep(self.poll_interval) + else: + msg = f"Fork creation timed out after {self.max_wait_seconds} seconds" + if self.raise_on_failure: + raise TimeoutError(msg) + logger.warning(msg) + + # Create issue branch if enabled + issue_branch = None + if self.create_branch: + issue_branch = f"fix-{issue_number}" + logger.info(f"Creating branch for issue #{issue_number}") + self._create_issue_branch(fork_path, issue_number) + + return {"repo": fork_path, "issue_branch": issue_branch} + + except Exception as e: + if self.raise_on_failure: + raise + logger.warning("Error forking repository from {url}: {error}", url=url, error=str(e)) + return {"repo": "", "issue_branch": None} diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py index 6a437e9ebb..496d27dd07 100644 --- a/integrations/github/tests/test_file_editor_tool.py +++ b/integrations/github/tests/test_file_editor_tool.py @@ -1,7 +1,9 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 - +from haystack import Pipeline +from haystack.components.agents import Agent +from haystack.components.generators.chat import OpenAIChatGenerator from haystack.utils import Secret from haystack_integrations.prompts.github.file_editor_tool import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA @@ -55,3 +57,41 @@ def test_to_dict(self, monkeypatch): assert tool_dict["init_parameters"]["repo"] is None assert tool_dict["init_parameters"]["branch"] == "main" assert tool_dict["init_parameters"]["raise_on_failure"] + + def test_pipeline_serialization(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + monkeypatch.setenv("OPENAI_API_KEY", "test-token") + + file_editor = GitHubFileEditorTool() + + agent = Agent( + chat_generator=OpenAIChatGenerator(), + tools=[file_editor], + ) + + pipeline = Pipeline() + pipeline.add_component("agent", agent) + + pipeline_dict = pipeline.to_dict() + + deserialized_pipeline = Pipeline.from_dict(pipeline_dict) + + deserialized_components = [instance for _, instance in deserialized_pipeline.graph.nodes(data="instance")] + + deserialized_agent_component = deserialized_components[0] + + assert isinstance(deserialized_agent_component, Agent) + + agent_tools = deserialized_agent_component.tools + assert len(agent_tools) == 1 + assert agent_tools[0].name == "file_editor" + assert isinstance(agent_tools[0], GitHubFileEditorTool) + + # Verify the tool's parameters were preserved + assert agent_tools[0].name == "file_editor" + assert agent_tools[0].description == FILE_EDITOR_PROMPT + assert agent_tools[0].parameters == FILE_EDITOR_SCHEMA + assert agent_tools[0].github_token == Secret.from_env_var("GITHUB_TOKEN") + assert agent_tools[0].repo is None + assert agent_tools[0].branch == "main" + assert agent_tools[0].raise_on_failure diff --git a/integrations/github/tests/test_repo_forker.py b/integrations/github/tests/test_repo_forker.py new file mode 100644 index 0000000000..6355a3bd53 --- /dev/null +++ b/integrations/github/tests/test_repo_forker.py @@ -0,0 +1,263 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from unittest.mock import patch + +import pytest +import requests +from haystack.utils import Secret + +from haystack_integrations.components.connectors.github.repo_forker import GitHubRepoForker + + +class TestGitHubRepoForker: + def test_init_default(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + forker = GitHubRepoForker() + assert forker.github_token is not None + assert forker.github_token.resolve_value() == "test-token" + assert forker.raise_on_failure is True + assert forker.wait_for_completion is False + assert forker.max_wait_seconds == 300 + assert forker.poll_interval == 2 + assert forker.auto_sync is True + assert forker.create_branch is True + + def test_init_with_parameters(self): + token = Secret.from_token("test-token") + forker = GitHubRepoForker( + github_token=token, + raise_on_failure=False, + wait_for_completion=True, + max_wait_seconds=60, + poll_interval=1, + auto_sync=False, + create_branch=False, + ) + assert forker.github_token == token + assert forker.raise_on_failure is False + assert forker.wait_for_completion is True + assert forker.max_wait_seconds == 60 + assert forker.poll_interval == 1 + assert forker.auto_sync is False + assert forker.create_branch is False + + # Test with invalid token type + with pytest.raises(TypeError): + GitHubRepoForker(github_token="not_a_secret") + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + token = Secret.from_env_var("ENV_VAR") + + forker = GitHubRepoForker( + github_token=token, + raise_on_failure=False, + wait_for_completion=True, + max_wait_seconds=60, + poll_interval=1, + auto_sync=False, + create_branch=False, + ) + + data = forker.to_dict() + + assert data == { + "type": "haystack_integrations.components.connectors.github.repo_forker.GitHubRepoForker", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "wait_for_completion": True, + "max_wait_seconds": 60, + "poll_interval": 1, + "auto_sync": False, + "create_branch": False, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("ENV_VAR", "test-token") + + data = { + "type": "haystack_integrations.components.connectors.github.repo_forker.GitHubRepoForker", + "init_parameters": { + "github_token": {"env_vars": ["ENV_VAR"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "wait_for_completion": True, + "max_wait_seconds": 60, + "poll_interval": 1, + "auto_sync": False, + "create_branch": False, + }, + } + + forker = GitHubRepoForker.from_dict(data) + + assert forker.github_token == Secret.from_env_var("ENV_VAR") + assert forker.raise_on_failure is False + assert forker.wait_for_completion is True + assert forker.max_wait_seconds == 60 + assert forker.poll_interval == 1 + assert forker.auto_sync is False + assert forker.create_branch is False + + @patch("requests.get") + @patch("requests.post") + def test_run_create_fork(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "test_user"}), + "https://api.github.com/repos/test_user/repo": create_mock_response( + {}, status_code=404 + ), # Fork doesn't exist + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( + {"object": {"sha": "abc123"}} + ), + } + + def get_side_effect(url, **_): + if url == "https://api.github.com/repos/test_user/repo": + if mock_get.call_count == 2: + return create_mock_response({}, status_code=404) # Fork doesn't exist + return create_mock_response({"default_branch": "main"}) + return get_responses.get(url, create_mock_response({"default_branch": "main"})) + + mock_get.side_effect = get_side_effect + + def post_side_effect(url, **_): + if "forks" in url: + return create_mock_response({"owner": {"login": "test_user"}, "name": "repo"}) + return create_mock_response({}) + + mock_post.side_effect = post_side_effect + + forker = GitHubRepoForker(create_branch=True, auto_sync=False) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "test_user/repo" + assert result["issue_branch"] == "fix-123" + + assert mock_get.call_count == 5 # user (2x), check fork status, get default branch, get SHA + + get_calls = [call[0][0] for call in mock_get.call_args_list] + assert get_calls.count("https://api.github.com/user") == 2 # get user, check fork + assert get_calls.count("https://api.github.com/repos/test_user/repo") == 2 # check status, get default branch + assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls + + post_calls = [call[0][0] for call in mock_post.call_args_list] + assert "https://api.github.com/repos/owner/repo/forks" in post_calls + assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls + assert mock_post.call_count == 2 # One for fork creation, one for branch creation + + @patch("requests.get") + @patch("requests.post") + def test_run_sync_existing_fork(self, mock_post, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + def create_mock_response(json_data, status_code=200): + class MockResponse: + def __init__(self, data, code): + self._data = data + self.status_code = code + + def json(self): + return self._data + + def raise_for_status(self): + if self.status_code >= 400: + error_message = f"HTTP {self.status_code}" + raise requests.RequestException(error_message) + + return MockResponse(json_data, status_code) + + get_responses = { + "https://api.github.com/user": create_mock_response({"login": "test_user"}), + "https://api.github.com/repos/test_user/repo": create_mock_response( + {"name": "repo", "default_branch": "main"} + ), + "https://api.github.com/repos/test_user/repo/git/ref/heads/main": create_mock_response( + {"object": {"sha": "abc123"}} + ), + } + + def get_side_effect(url, **_): + return get_responses.get(url, create_mock_response({"default_branch": "main"})) + + mock_get.side_effect = get_side_effect + + def post_side_effect(url, **_): + if "merge-upstream" in url: + return create_mock_response({}) + return create_mock_response({}) + + mock_post.side_effect = post_side_effect + + forker = GitHubRepoForker(create_branch=True, auto_sync=True) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "test_user/repo" + assert result["issue_branch"] == "fix-123" + + assert mock_get.call_count == 5 # user, check fork, check fork status, get default branch, get SHA + + get_calls = [call[0][0] for call in mock_get.call_args_list] + assert "https://api.github.com/user" in get_calls + assert "https://api.github.com/repos/test_user/repo" in get_calls + assert "https://api.github.com/repos/test_user/repo/git/ref/heads/main" in get_calls + + post_calls = [call[0][0] for call in mock_post.call_args_list] + assert "https://api.github.com/repos/test_user/repo/merge-upstream" in post_calls + assert "https://api.github.com/repos/test_user/repo/git/refs" in post_calls + assert mock_post.call_count == 2 # One for sync, one for branch creation + + @patch("requests.get") + @patch("requests.post") + def test_run_error_handling(self, _, mock_get, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + mock_get.side_effect = requests.RequestException("API Error") + + forker = GitHubRepoForker(raise_on_failure=False) + + result = forker.run(url="https://github.com/owner/repo/issues/123") + + assert result["repo"] == "" + assert result["issue_branch"] is None + + forker = GitHubRepoForker(raise_on_failure=True) + with pytest.raises(requests.RequestException): + forker.run(url="https://github.com/owner/repo/issues/123") + + def test_parse_github_url(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + forker = GitHubRepoForker() + + owner, repo, issue_number = forker._parse_github_url("https://github.com/owner/repo/issues/123") + assert owner == "owner" + assert repo == "repo" + assert issue_number == "123" + + with pytest.raises(ValueError): + forker._parse_github_url("https://github.com/invalid/url") From 8f4bf9f11cb2650ebf5d08b17f04da610aafe94e Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 6 May 2025 09:06:39 +0200 Subject: [PATCH 32/51] extend pipeline to_dict test --- integrations/github/pydoc/config.yml | 2 +- .../github/tests/test_file_editor_tool.py | 69 +++++++++++++++++-- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/integrations/github/pydoc/config.yml b/integrations/github/pydoc/config.yml index ae7fb568f5..040be3b93f 100644 --- a/integrations/github/pydoc/config.yml +++ b/integrations/github/pydoc/config.yml @@ -7,7 +7,7 @@ loaders: "haystack_integrations.components.connectors.github.issue_viewer", "haystack_integrations.components.connectors.github.pr_creator", "haystack_integrations.components.connectors.github.repo_viewer", - "haystack_integrations.components.connectors.github.repository_forker", + "haystack_integrations.components.connectors.github.repo_forker", ] ignore_when_discovered: ["__init__"] processors: diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py index 496d27dd07..a20c4e2db9 100644 --- a/integrations/github/tests/test_file_editor_tool.py +++ b/integrations/github/tests/test_file_editor_tool.py @@ -74,18 +74,73 @@ def test_pipeline_serialization(self, monkeypatch): pipeline_dict = pipeline.to_dict() - deserialized_pipeline = Pipeline.from_dict(pipeline_dict) + assert pipeline_dict == { + "metadata": {}, + "max_runs_per_component": 100, + "components": { + "agent": { + "type": "haystack.components.agents.agent.Agent", + "init_parameters": { + "chat_generator": { + "type": "haystack.components.generators.chat.openai.OpenAIChatGenerator", + "init_parameters": { + "model": "gpt-4o-mini", + "streaming_callback": None, + "api_base_url": None, + "organization": None, + "generation_kwargs": {}, + "api_key": { + "env_vars": ["OPENAI_API_KEY"], + "strict": True, + "type": "env_var" + }, + "timeout": None, + "max_retries": None, + "tools": None, + "tools_strict": False, + "http_client_kwargs": None + } + }, + "tools": [ + { + "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", + "init_parameters": { + "name": "file_editor", + "description": FILE_EDITOR_PROMPT, + "parameters": FILE_EDITOR_SCHEMA, + "github_token": { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var" + }, + "repo": None, + "branch": "main", + "raise_on_failure": True, + } + } + ], + "system_prompt": None, + "exit_conditions": ["text"], + "state_schema": {}, + "max_agent_steps": 100, + "raise_on_tool_invocation_failure": False, + "streaming_callback": None + } + } + }, + "connections": [], + "connection_type_validation": True + } + deserialized_pipeline = Pipeline.from_dict(pipeline_dict) deserialized_components = [instance for _, instance in deserialized_pipeline.graph.nodes(data="instance")] + deserialized_agent = deserialized_components[0] + assert isinstance(deserialized_agent, Agent) - deserialized_agent_component = deserialized_components[0] - - assert isinstance(deserialized_agent_component, Agent) - - agent_tools = deserialized_agent_component.tools + agent_tools = deserialized_agent.tools assert len(agent_tools) == 1 - assert agent_tools[0].name == "file_editor" assert isinstance(agent_tools[0], GitHubFileEditorTool) + assert agent_tools[0].name == "file_editor" # Verify the tool's parameters were preserved assert agent_tools[0].name == "file_editor" From ec64b097ff559807d8357198be2a97e9f579853a Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 6 May 2025 09:28:39 +0200 Subject: [PATCH 33/51] set default branch of repo viewer --- .../components/connectors/github/repo_viewer.py | 5 +---- .../prompts/github/repo_viewer_tool.py | 4 ++++ integrations/github/tests/test_repo_viewer.py | 14 +++++++------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py index 2ddb34e7e9..4ae4d02115 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/repo_viewer.py @@ -75,7 +75,7 @@ def __init__( raise_on_failure: bool = True, max_file_size: int = 1_000_000, # 1MB default limit repo: Optional[str] = None, - branch: Optional[str] = None, + branch: str = "main", ): """ Initialize the component. @@ -221,9 +221,6 @@ def run(self, path: str, repo: Optional[str] = None, branch: Optional[str] = Non raise ValueError(msg) if branch is None: branch = self.branch - if branch is None: - msg = "Branch not provided in initialization or run() method" - raise ValueError(msg) try: owner, repo_name = self._parse_repo(repo) diff --git a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py index 5fbe82ca16..081534f515 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py @@ -71,6 +71,10 @@ "type": "string", "description": "Path to directory or file to view. Defaults to repository root.", }, + "branch": { + "type": "string", + "description": "Branch to view. Defaults to 'main'.", + }, }, "required": ["repo"], "type": "object", diff --git a/integrations/github/tests/test_repo_viewer.py b/integrations/github/tests/test_repo_viewer.py index 5557ca61d5..4bfdda1d74 100644 --- a/integrations/github/tests/test_repo_viewer.py +++ b/integrations/github/tests/test_repo_viewer.py @@ -19,18 +19,18 @@ def test_init_default(self, monkeypatch): assert viewer.raise_on_failure is True assert viewer.max_file_size == 1_000_000 assert viewer.repo is None - assert viewer.branch is None + assert viewer.branch == "main" def test_init_with_parameters(self): token = Secret.from_token("test-token") viewer = GitHubRepoViewer( - github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" + github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="test-branch" ) assert viewer.github_token == token assert viewer.raise_on_failure is False assert viewer.max_file_size == 500_000 assert viewer.repo == "owner/repo" - assert viewer.branch == "main" + assert viewer.branch == "test-branch" with pytest.raises(TypeError): GitHubRepoViewer(github_token="not_a_secret") @@ -41,7 +41,7 @@ def test_to_dict(self, monkeypatch): token = Secret.from_env_var("ENV_VAR") viewer = GitHubRepoViewer( - github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="main" + github_token=token, raise_on_failure=False, max_file_size=500_000, repo="owner/repo", branch="test-branch" ) data = viewer.to_dict() @@ -53,7 +53,7 @@ def test_to_dict(self, monkeypatch): "raise_on_failure": False, "max_file_size": 500_000, "repo": "owner/repo", - "branch": "main", + "branch": "test-branch", }, } @@ -67,7 +67,7 @@ def test_from_dict(self, monkeypatch): "raise_on_failure": False, "max_file_size": 500_000, "repo": "owner/repo", - "branch": "main", + "branch": "test-branch", }, } @@ -77,7 +77,7 @@ def test_from_dict(self, monkeypatch): assert viewer.raise_on_failure is False assert viewer.max_file_size == 500_000 assert viewer.repo == "owner/repo" - assert viewer.branch == "main" + assert viewer.branch == "test-branch" @patch("requests.get") def test_run_file(self, mock_get, monkeypatch): From 9ae78570753abfc1f95ae8f70a6b57204309a363 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 6 May 2025 09:43:21 +0200 Subject: [PATCH 34/51] lint --- .../github/tests/test_file_editor_tool.py | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py index a20c4e2db9..4c60626b6e 100644 --- a/integrations/github/tests/test_file_editor_tool.py +++ b/integrations/github/tests/test_file_editor_tool.py @@ -89,17 +89,13 @@ def test_pipeline_serialization(self, monkeypatch): "api_base_url": None, "organization": None, "generation_kwargs": {}, - "api_key": { - "env_vars": ["OPENAI_API_KEY"], - "strict": True, - "type": "env_var" - }, + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, "timeout": None, "max_retries": None, "tools": None, "tools_strict": False, - "http_client_kwargs": None - } + "http_client_kwargs": None, + }, }, "tools": [ { @@ -108,15 +104,11 @@ def test_pipeline_serialization(self, monkeypatch): "name": "file_editor", "description": FILE_EDITOR_PROMPT, "parameters": FILE_EDITOR_SCHEMA, - "github_token": { - "env_vars": ["GITHUB_TOKEN"], - "strict": True, - "type": "env_var" - }, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, "repo": None, "branch": "main", "raise_on_failure": True, - } + }, } ], "system_prompt": None, @@ -124,12 +116,12 @@ def test_pipeline_serialization(self, monkeypatch): "state_schema": {}, "max_agent_steps": 100, "raise_on_tool_invocation_failure": False, - "streaming_callback": None - } + "streaming_callback": None, + }, } }, "connections": [], - "connection_type_validation": True + "connection_type_validation": True, } deserialized_pipeline = Pipeline.from_dict(pipeline_dict) From d0480c2c70bea8857f5f22cd0282a6a949b3ca65 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 6 May 2025 13:09:51 +0200 Subject: [PATCH 35/51] add four more tools --- .../prompts/github/issue_viewer.py | 25 ++++++ .../prompts/github/pr_system_prompt.py | 28 +++++++ .../tools/github/file_editor_tool.py | 2 +- .../tools/github/issue_commenter_tool.py | 76 +++++++++++++++++ .../tools/github/issue_viewer_tool.py | 76 +++++++++++++++++ .../tools/github/pr_creator_tool.py | 72 ++++++++++++++++ .../tools/github/repo_viewer_tool.py | 84 +++++++++++++++++++ 7 files changed, 362 insertions(+), 1 deletion(-) create mode 100644 integrations/github/src/haystack_integrations/prompts/github/issue_viewer.py create mode 100644 integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py create mode 100644 integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py create mode 100644 integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py create mode 100644 integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py diff --git a/integrations/github/src/haystack_integrations/prompts/github/issue_viewer.py b/integrations/github/src/haystack_integrations/prompts/github/issue_viewer.py new file mode 100644 index 0000000000..af01903c74 --- /dev/null +++ b/integrations/github/src/haystack_integrations/prompts/github/issue_viewer.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +ISSUE_VIEWER_PROMPT = """Haystack-Agent uses this tool to view a GitHub issue. +Haystack-Agent can view one issue at a time. + + +Pass an `issue_url` string for the GitHub issue that you want to view. +It is required to pass `issue_url` to use this tool. +The structure is "https://github.com/repo-owner/repo-name/issues/issue-number". + +Examples: + +- {"issue_url": "https://github.com/deepset-ai/haystack/issues/9343"} + - will show you the issue 9343 of the haystack repository +- {"issue_url": "https://github.com/deepset-ai/haystack-core-integrations/issues/1685"} + - will show you the issue 1685 of the haystack-core-integrations repository + +""" + +ISSUE_VIEWER_SCHEMA = { + "properties": {"issue_url": {"type": "string", "description": "URL of the GitHub issue to link the PR to."}}, + "required": ["issue_url"], + "type": "object", +} diff --git a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py index 0acfba3a72..fd53c7fd21 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py @@ -53,3 +53,31 @@ Each description explains what changes were made and why they were necessary. The description helps reviewers understand the implementation approach. """ + +PR_SCHEMA = { + "properties": { + "issue_url": {"type": "string", "description": "URL of the GitHub issue to link the PR to."}, + "title": { + "type": "string", + "description": "Title of the pull request.", + }, + "branch": { + "type": "string", + "description": "Name of the branch in your fork where changes are implemented.", + }, + "base": { + "type": "string", + "description": "Name of the branch in the original repo you want to merge into.", + }, + "body": { + "type": "string", + "description": "Additional content for the pull request description.", + }, + "draft": { + "type": "boolean", + "description": "Whether to create a draft pull request.", + }, + }, + "required": ["issue_url", "title", "branch", "base"], + "type": "object", +} diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index 2548f038b8..3679170d68 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -13,7 +13,7 @@ class GitHubFileEditorTool(ComponentTool): """ - A Haystack tool for editing files in GitHub repositories. + A tool for editing files in GitHub repositories. """ def __init__( diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py new file mode 100644 index 0000000000..ef431c847b --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Dict, Optional + +from haystack import default_from_dict, default_to_dict +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.issue_commenter import GitHubIssueCommenter +from haystack_integrations.prompts.github.comment_tool import COMMENT_PROMPT, COMMENT_SCHEMA + + +class GitHubIssueCommenterTool(ComponentTool): + """ + A tool for commenting on GitHub issues. + """ + + def __init__( + self, + *, + name: Optional[str] = "issue_commenter", + description: Optional[str] = COMMENT_PROMPT, + parameters: Optional[Dict[str, Any]] = COMMENT_SCHEMA, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + issue_commenter = GitHubIssueCommenter( + github_token=github_token, + raise_on_failure=raise_on_failure, + retry_attempts=retry_attempts, + ) + super().__init__( + component=issue_commenter, + name=name, + description=description, + parameters=parameters, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, + name=self.name, + description=self.description, + parameters=self.parameters, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenterTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py new file mode 100644 index 0000000000..3b7c795bf7 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Dict, Optional + +from haystack import default_from_dict, default_to_dict +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.issue_viewer import GitHubIssueViewer +from haystack_integrations.prompts.github.issue_viewer import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA + + +class GitHubIssueViewerTool(ComponentTool): + """ + A tool for viewing GitHub issues. + """ + + def __init__( + self, + *, + name: Optional[str] = "issue_viewer", + description: Optional[str] = ISSUE_VIEWER_PROMPT, + parameters: Optional[Dict[str, Any]] = ISSUE_VIEWER_SCHEMA, + github_token: Optional[Secret] = None, + raise_on_failure: bool = True, + retry_attempts: int = 2, + ): + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.raise_on_failure = raise_on_failure + self.retry_attempts = retry_attempts + + issue_viewer = GitHubIssueViewer( + github_token=github_token, + raise_on_failure=raise_on_failure, + retry_attempts=retry_attempts, + ) + super().__init__( + component=issue_viewer, + name=name, + description=description, + parameters=parameters, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, + name=self.name, + description=self.description, + parameters=self.parameters, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + retry_attempts=self.retry_attempts, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewerTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py new file mode 100644 index 0000000000..0c09da7535 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -0,0 +1,72 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Dict, Optional + +from haystack import default_from_dict, default_to_dict +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.pr_creator import GitHubPRCreator +from haystack_integrations.prompts.github.pr_system_prompt import PR_SYSTEM_PROMPT, PR_SCHEMA + + +class GitHubPRCreatorTool(ComponentTool): + """ + A tool for creating pull requests in GitHub repositories. + """ + + def __init__( + self, + *, + name: Optional[str] = "pr_creator", + description: Optional[str] = PR_SYSTEM_PROMPT, + parameters: Optional[Dict[str, Any]] = PR_SCHEMA, + github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure: bool = True, + ): + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.raise_on_failure = raise_on_failure + + pr_creator = GitHubPRCreator( + github_token=github_token, + raise_on_failure=raise_on_failure, + ) + super().__init__( + component=pr_creator, + name=name, + description=description, + parameters=parameters, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, + name=self.name, + description=self.description, + parameters=self.parameters, + github_token=self.github_token.to_dict() if self.github_token else None, + raise_on_failure=self.raise_on_failure, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreatorTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) + return default_from_dict(cls, data) diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py new file mode 100644 index 0000000000..045f39fa00 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Any, Dict, Optional + +from haystack import default_from_dict, default_to_dict +from haystack.tools import ComponentTool +from haystack.utils import Secret, deserialize_secrets_inplace + +from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer +from haystack_integrations.prompts.github.repo_viewer_tool import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA + + +class GitHubRepoViewerTool(ComponentTool): + """ + A tool for viewing files and directories in GitHub repositories. + """ + + def __init__( + self, + *, + name: Optional[str] = "repo_viewer", + description: Optional[str] = REPO_VIEWER_PROMPT, + parameters: Optional[Dict[str, Any]] = REPO_VIEWER_SCHEMA, + github_token: Optional[Secret] = None, + repo: Optional[str] = None, + branch: str = "main", + raise_on_failure: bool = True, + max_file_size: int = 1_000_000, # 1MB default limit + ): + self.name = name + self.description = description + self.parameters = parameters + self.github_token = github_token + self.repo = repo + self.branch = branch + self.raise_on_failure = raise_on_failure + self.max_file_size = max_file_size + + repo_viewer = GitHubRepoViewer( + github_token=github_token, + repo=repo, + branch=branch, + raise_on_failure=raise_on_failure, + max_file_size=max_file_size, + ) + super().__init__( + component=repo_viewer, + name=name, + description=description, + parameters=parameters, + ) + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the tool to a dictionary. + + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, + name=self.name, + description=self.description, + parameters=self.parameters, + github_token=self.github_token.to_dict() if self.github_token else None, + repo=self.repo, + branch=self.branch, + raise_on_failure=self.raise_on_failure, + max_file_size=self.max_file_size, + ) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": + """ + Deserializes the tool from a dictionary. + + :param data: + Dictionary to deserialize from. + :returns: + Deserialized tool. + """ + deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) + return default_from_dict(cls, data) From ba9b1ea55d1885bb2a6034a3add95dfee2bb66de Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 6 May 2025 13:10:35 +0200 Subject: [PATCH 36/51] lint --- .../src/haystack_integrations/tools/github/pr_creator_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py index 0c09da7535..124764a9f3 100644 --- a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -8,7 +8,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.pr_creator import GitHubPRCreator -from haystack_integrations.prompts.github.pr_system_prompt import PR_SYSTEM_PROMPT, PR_SCHEMA +from haystack_integrations.prompts.github.pr_system_prompt import PR_SCHEMA, PR_SYSTEM_PROMPT class GitHubPRCreatorTool(ComponentTool): From eb7a267060e45ebf92f5b1b55e5e9e9a9521cd2d Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 7 May 2025 08:45:50 +0200 Subject: [PATCH 37/51] rename prompts --- .../prompts/github/__init__.py | 19 ++++++++++--------- ...e_editor_tool.py => file_editor_prompt.py} | 0 ...ment_tool.py => issue_commenter_prompt.py} | 4 ++-- ...issue_viewer.py => issue_viewer_prompt.py} | 0 ..._system_prompt.py => pr_creator_prompt.py} | 4 ++-- ...o_viewer_tool.py => repo_viewer_prompt.py} | 0 .../prompts/github/system_prompt.py | 2 +- .../tools/github/__init__.py | 8 ++++++++ .../tools/github/file_editor_tool.py | 2 +- .../tools/github/issue_commenter_tool.py | 6 +++--- .../tools/github/issue_viewer_tool.py | 2 +- .../tools/github/pr_creator_tool.py | 6 +++--- .../tools/github/repo_viewer_tool.py | 2 +- .../github/tests/test_file_editor_tool.py | 2 +- 14 files changed, 33 insertions(+), 24 deletions(-) rename integrations/github/src/haystack_integrations/prompts/github/{file_editor_tool.py => file_editor_prompt.py} (100%) rename integrations/github/src/haystack_integrations/prompts/github/{comment_tool.py => issue_commenter_prompt.py} (81%) rename integrations/github/src/haystack_integrations/prompts/github/{issue_viewer.py => issue_viewer_prompt.py} (100%) rename integrations/github/src/haystack_integrations/prompts/github/{pr_system_prompt.py => pr_creator_prompt.py} (97%) rename integrations/github/src/haystack_integrations/prompts/github/{repo_viewer_tool.py => repo_viewer_prompt.py} (100%) diff --git a/integrations/github/src/haystack_integrations/prompts/github/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py index ecef89f8ae..834362aec3 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/__init__.py +++ b/integrations/github/src/haystack_integrations/prompts/github/__init__.py @@ -1,19 +1,20 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from .comment_tool import COMMENT_PROMPT, COMMENT_SCHEMA -from .file_editor_tool import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA -from .pr_system_prompt import PR_SYSTEM_PROMPT -from .repo_viewer_tool import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA -from .system_prompt import ISSUE_PROMPT +from .file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from .issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA +from .pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA +from .repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA +from .system_prompt import SYSTEM_PROMPT __all__ = [ - "COMMENT_PROMPT", - "COMMENT_SCHEMA", "FILE_EDITOR_PROMPT", "FILE_EDITOR_SCHEMA", - "ISSUE_PROMPT", - "PR_SYSTEM_PROMPT", + "ISSUE_COMMENTER_PROMPT", + "ISSUE_COMMENTER_SCHEMA", + "PR_CREATOR_PROMPT", + "PR_CREATOR_SCHEMA", "REPO_VIEWER_PROMPT", "REPO_VIEWER_SCHEMA", + "SYSTEM_PROMPT", ] diff --git a/integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/prompts/github/file_editor_prompt.py similarity index 100% rename from integrations/github/src/haystack_integrations/prompts/github/file_editor_tool.py rename to integrations/github/src/haystack_integrations/prompts/github/file_editor_prompt.py diff --git a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py b/integrations/github/src/haystack_integrations/prompts/github/issue_commenter_prompt.py similarity index 81% rename from integrations/github/src/haystack_integrations/prompts/github/comment_tool.py rename to integrations/github/src/haystack_integrations/prompts/github/issue_commenter_prompt.py index 7f4e2002d6..f35411293e 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/comment_tool.py +++ b/integrations/github/src/haystack_integrations/prompts/github/issue_commenter_prompt.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -COMMENT_PROMPT = """Haystack-Agent uses this tool to post a comment to a GitHub-issue discussion. +ISSUE_COMMENTER_PROMPT = """Haystack-Agent uses this tool to post a comment to a GitHub-issue discussion. Pass a `comment` string to post a comment. @@ -12,7 +12,7 @@ Haystack-Agent always passes the contents of the comment to the "comment" parameter when calling this tool. """ -COMMENT_SCHEMA = { +ISSUE_COMMENTER_SCHEMA = { "properties": { "comment": {"type": "string", "description": "The contents of the comment that you want to create."} }, diff --git a/integrations/github/src/haystack_integrations/prompts/github/issue_viewer.py b/integrations/github/src/haystack_integrations/prompts/github/issue_viewer_prompt.py similarity index 100% rename from integrations/github/src/haystack_integrations/prompts/github/issue_viewer.py rename to integrations/github/src/haystack_integrations/prompts/github/issue_viewer_prompt.py diff --git a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/pr_creator_prompt.py similarity index 97% rename from integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py rename to integrations/github/src/haystack_integrations/prompts/github/pr_creator_prompt.py index fd53c7fd21..d8df98e691 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/pr_system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/pr_creator_prompt.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -PR_SYSTEM_PROMPT = """The assistant is Haystack-Agent, created by deepset. +PR_CREATOR_PROMPT = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent creates Pull Requests that resolve GitHub issues. Haystack-Agent receives a GitHub issue and all current comments. @@ -54,7 +54,7 @@ The description helps reviewers understand the implementation approach. """ -PR_SCHEMA = { +PR_CREATOR_SCHEMA = { "properties": { "issue_url": {"type": "string", "description": "URL of the GitHub issue to link the PR to."}, "title": { diff --git a/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/prompts/github/repo_viewer_prompt.py similarity index 100% rename from integrations/github/src/haystack_integrations/prompts/github/repo_viewer_tool.py rename to integrations/github/src/haystack_integrations/prompts/github/repo_viewer_prompt.py diff --git a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py index 95cda82e3d..cd48741508 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py +++ b/integrations/github/src/haystack_integrations/prompts/github/system_prompt.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -ISSUE_PROMPT = """The assistant is Haystack-Agent, created by deepset. +SYSTEM_PROMPT = """The assistant is Haystack-Agent, created by deepset. Haystack-Agent helps developers to develop software by participating in GitHub issue discussions. Haystack-Agent receives a GitHub issue and all current comments. diff --git a/integrations/github/src/haystack_integrations/tools/github/__init__.py b/integrations/github/src/haystack_integrations/tools/github/__init__.py index 81747ff39c..fabeb6acd6 100644 --- a/integrations/github/src/haystack_integrations/tools/github/__init__.py +++ b/integrations/github/src/haystack_integrations/tools/github/__init__.py @@ -2,7 +2,15 @@ # # SPDX-License-Identifier: Apache-2.0 from .file_editor_tool import GitHubFileEditorTool +from .issue_commenter_tool import GitHubIssueCommenterTool +from .issue_viewer_tool import GitHubIssueViewerTool +from .pr_creator_tool import GitHubPRCreatorTool +from .repo_viewer_tool import GitHubRepoViewerTool __all__ = [ "GitHubFileEditorTool", + "GitHubIssueCommenterTool", + "GitHubIssueViewerTool", + "GitHubPRCreatorTool", + "GitHubRepoViewerTool", ] diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index 3679170d68..9b54105726 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -8,7 +8,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.file_editor import GitHubFileEditor -from haystack_integrations.prompts.github.file_editor_tool import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from haystack_integrations.prompts.github.file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA class GitHubFileEditorTool(ComponentTool): diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py index ef431c847b..58ac9dc47b 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py @@ -8,7 +8,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.issue_commenter import GitHubIssueCommenter -from haystack_integrations.prompts.github.comment_tool import COMMENT_PROMPT, COMMENT_SCHEMA +from haystack_integrations.prompts.github.issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA class GitHubIssueCommenterTool(ComponentTool): @@ -20,8 +20,8 @@ def __init__( self, *, name: Optional[str] = "issue_commenter", - description: Optional[str] = COMMENT_PROMPT, - parameters: Optional[Dict[str, Any]] = COMMENT_SCHEMA, + description: Optional[str] = ISSUE_COMMENTER_PROMPT, + parameters: Optional[Dict[str, Any]] = ISSUE_COMMENTER_SCHEMA, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, retry_attempts: int = 2, diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py index 3b7c795bf7..738399031a 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py @@ -8,7 +8,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.issue_viewer import GitHubIssueViewer -from haystack_integrations.prompts.github.issue_viewer import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA +from haystack_integrations.prompts.github.issue_viewer_prompt import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA class GitHubIssueViewerTool(ComponentTool): diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py index 124764a9f3..de5a0685ae 100644 --- a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -8,7 +8,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.pr_creator import GitHubPRCreator -from haystack_integrations.prompts.github.pr_system_prompt import PR_SCHEMA, PR_SYSTEM_PROMPT +from haystack_integrations.prompts.github.pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA class GitHubPRCreatorTool(ComponentTool): @@ -20,8 +20,8 @@ def __init__( self, *, name: Optional[str] = "pr_creator", - description: Optional[str] = PR_SYSTEM_PROMPT, - parameters: Optional[Dict[str, Any]] = PR_SCHEMA, + description: Optional[str] = PR_CREATOR_PROMPT, + parameters: Optional[Dict[str, Any]] = PR_CREATOR_SCHEMA, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, ): diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index 045f39fa00..712ece19a6 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -8,7 +8,7 @@ from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer -from haystack_integrations.prompts.github.repo_viewer_tool import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA +from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA class GitHubRepoViewerTool(ComponentTool): diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py index 4c60626b6e..72614cfa42 100644 --- a/integrations/github/tests/test_file_editor_tool.py +++ b/integrations/github/tests/test_file_editor_tool.py @@ -6,7 +6,7 @@ from haystack.components.generators.chat import OpenAIChatGenerator from haystack.utils import Secret -from haystack_integrations.prompts.github.file_editor_tool import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from haystack_integrations.prompts.github.file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA from haystack_integrations.tools.github.file_editor_tool import GitHubFileEditorTool From d98d3ec33adde5b544c1eb4c2ca51ff88e264554 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 7 May 2025 08:56:45 +0200 Subject: [PATCH 38/51] add tests for four more tools --- .../github/tests/test_issue_commenter_tool.py | 54 +++++++++++++++++++ .../github/tests/test_issue_viewer_tool.py | 48 +++++++++++++++++ .../github/tests/test_pr_creator_tool.py | 50 +++++++++++++++++ .../github/tests/test_repo_viewer_tool.py | 54 +++++++++++++++++++ 4 files changed, 206 insertions(+) create mode 100644 integrations/github/tests/test_issue_commenter_tool.py create mode 100644 integrations/github/tests/test_issue_viewer_tool.py create mode 100644 integrations/github/tests/test_pr_creator_tool.py create mode 100644 integrations/github/tests/test_repo_viewer_tool.py diff --git a/integrations/github/tests/test_issue_commenter_tool.py b/integrations/github/tests/test_issue_commenter_tool.py new file mode 100644 index 0000000000..32e4eaf0c8 --- /dev/null +++ b/integrations/github/tests/test_issue_commenter_tool.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack.utils import Secret + +from haystack_integrations.prompts.github.issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA +from haystack_integrations.tools.github.issue_commenter_tool import GitHubIssueCommenterTool + + +class TestGitHubIssueCommenterTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueCommenterTool() + assert tool.name == "issue_commenter" + assert tool.description == ISSUE_COMMENTER_PROMPT + assert tool.parameters == ISSUE_COMMENTER_SCHEMA + assert tool.retry_attempts == 2 + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool", + "init_parameters": { + "name": "issue_commenter", + "description": ISSUE_COMMENTER_PROMPT, + "parameters": ISSUE_COMMENTER_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "raise_on_failure": True, + "retry_attempts": 2, + }, + } + tool = GitHubIssueCommenterTool.from_dict(tool_dict) + assert tool.name == "issue_commenter" + assert tool.description == ISSUE_COMMENTER_PROMPT + assert tool.parameters == ISSUE_COMMENTER_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure + assert tool.retry_attempts == 2 + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueCommenterTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool" + assert tool_dict["init_parameters"]["name"] == "issue_commenter" + assert tool_dict["init_parameters"]["description"] == ISSUE_COMMENTER_PROMPT + assert tool_dict["init_parameters"]["parameters"] == ISSUE_COMMENTER_SCHEMA + assert tool_dict["init_parameters"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["init_parameters"]["raise_on_failure"] + assert tool_dict["init_parameters"]["retry_attempts"] == 2 diff --git a/integrations/github/tests/test_issue_viewer_tool.py b/integrations/github/tests/test_issue_viewer_tool.py new file mode 100644 index 0000000000..ba343457c8 --- /dev/null +++ b/integrations/github/tests/test_issue_viewer_tool.py @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack_integrations.prompts.github.issue_viewer_prompt import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA +from haystack_integrations.tools.github.issue_viewer_tool import GitHubIssueViewerTool + + +class TestGitHubIssueViewerTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueViewerTool() + assert tool.name == "issue_viewer" + assert tool.description == ISSUE_VIEWER_PROMPT + assert tool.parameters == ISSUE_VIEWER_SCHEMA + assert tool.retry_attempts == 2 + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool", + "init_parameters": { + "name": "issue_viewer", + "description": ISSUE_VIEWER_PROMPT, + "parameters": ISSUE_VIEWER_SCHEMA, + "github_token": None, + "raise_on_failure": True, + "retry_attempts": 2, + }, + } + tool = GitHubIssueViewerTool.from_dict(tool_dict) + assert tool.name == "issue_viewer" + assert tool.description == ISSUE_VIEWER_PROMPT + assert tool.parameters == ISSUE_VIEWER_SCHEMA + assert tool.github_token is None + assert tool.raise_on_failure + assert tool.retry_attempts == 2 + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueViewerTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool" + assert tool_dict["init_parameters"]["name"] == "issue_viewer" + assert tool_dict["init_parameters"]["description"] == ISSUE_VIEWER_PROMPT + assert tool_dict["init_parameters"]["parameters"] == ISSUE_VIEWER_SCHEMA + assert tool_dict["init_parameters"]["github_token"] is None + assert tool_dict["init_parameters"]["raise_on_failure"] + assert tool_dict["init_parameters"]["retry_attempts"] == 2 diff --git a/integrations/github/tests/test_pr_creator_tool.py b/integrations/github/tests/test_pr_creator_tool.py new file mode 100644 index 0000000000..35db78004e --- /dev/null +++ b/integrations/github/tests/test_pr_creator_tool.py @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack.utils import Secret + +from haystack_integrations.prompts.github.pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA +from haystack_integrations.tools.github.pr_creator_tool import GitHubPRCreatorTool + + +class TestGitHubPRCreatorTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubPRCreatorTool() + assert tool.name == "pr_creator" + assert tool.description == PR_CREATOR_PROMPT + assert tool.parameters == PR_CREATOR_SCHEMA + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool", + "init_parameters": { + "name": "pr_creator", + "description": PR_CREATOR_PROMPT, + "parameters": PR_CREATOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "raise_on_failure": True, + }, + } + tool = GitHubPRCreatorTool.from_dict(tool_dict) + assert tool.name == "pr_creator" + assert tool.description == PR_CREATOR_PROMPT + assert tool.parameters == PR_CREATOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubPRCreatorTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool" + assert tool_dict["init_parameters"]["name"] == "pr_creator" + assert tool_dict["init_parameters"]["description"] == PR_CREATOR_PROMPT + assert tool_dict["init_parameters"]["parameters"] == PR_CREATOR_SCHEMA + assert tool_dict["init_parameters"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["init_parameters"]["raise_on_failure"] diff --git a/integrations/github/tests/test_repo_viewer_tool.py b/integrations/github/tests/test_repo_viewer_tool.py new file mode 100644 index 0000000000..47bc2b64c4 --- /dev/null +++ b/integrations/github/tests/test_repo_viewer_tool.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA +from haystack_integrations.tools.github.repo_viewer_tool import GitHubRepoViewerTool + + +class TestGitHubRepoViewerTool: + def test_init(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubRepoViewerTool() + assert tool.name == "repo_viewer" + assert tool.description == REPO_VIEWER_PROMPT + assert tool.parameters == REPO_VIEWER_SCHEMA + assert tool.max_file_size == 1_000_000 + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", + "init_parameters": { + "name": "repo_viewer", + "description": REPO_VIEWER_PROMPT, + "parameters": REPO_VIEWER_SCHEMA, + "github_token": None, + "repo": None, + "branch": "main", + "raise_on_failure": True, + "max_file_size": 1_000_000, + }, + } + tool = GitHubRepoViewerTool.from_dict(tool_dict) + assert tool.name == "repo_viewer" + assert tool.description == REPO_VIEWER_PROMPT + assert tool.parameters == REPO_VIEWER_SCHEMA + assert tool.github_token is None + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure + assert tool.max_file_size == 1_000_000 + + def test_to_dict(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubRepoViewerTool() + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool" + assert tool_dict["init_parameters"]["name"] == "repo_viewer" + assert tool_dict["init_parameters"]["description"] == REPO_VIEWER_PROMPT + assert tool_dict["init_parameters"]["parameters"] == REPO_VIEWER_SCHEMA + assert tool_dict["init_parameters"]["github_token"] is None + assert tool_dict["init_parameters"]["repo"] is None + assert tool_dict["init_parameters"]["branch"] == "main" + assert tool_dict["init_parameters"]["raise_on_failure"] + assert tool_dict["init_parameters"]["max_file_size"] == 1_000_000 From b8bb2ed382a7fd5fb131e21e422d46ea4972de15 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 09:59:14 +0200 Subject: [PATCH 39/51] rename context prompt --- .../github/src/haystack_integrations/prompts/github/__init__.py | 2 ++ .../prompts/github/{context.py => context_prompt.py} | 0 2 files changed, 2 insertions(+) rename integrations/github/src/haystack_integrations/prompts/github/{context.py => context_prompt.py} (100%) diff --git a/integrations/github/src/haystack_integrations/prompts/github/__init__.py b/integrations/github/src/haystack_integrations/prompts/github/__init__.py index 834362aec3..b13f31c239 100644 --- a/integrations/github/src/haystack_integrations/prompts/github/__init__.py +++ b/integrations/github/src/haystack_integrations/prompts/github/__init__.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +from .context_prompt import CONTEXT_PROMPT from .file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA from .issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA from .pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA @@ -8,6 +9,7 @@ from .system_prompt import SYSTEM_PROMPT __all__ = [ + "CONTEXT_PROMPT", "FILE_EDITOR_PROMPT", "FILE_EDITOR_SCHEMA", "ISSUE_COMMENTER_PROMPT", diff --git a/integrations/github/src/haystack_integrations/prompts/github/context.py b/integrations/github/src/haystack_integrations/prompts/github/context_prompt.py similarity index 100% rename from integrations/github/src/haystack_integrations/prompts/github/context.py rename to integrations/github/src/haystack_integrations/prompts/github/context_prompt.py From 8635fa50a7c683b3573604631b412285c74b9d6b Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 10:14:42 +0200 Subject: [PATCH 40/51] add outputs_to_state as param to GitHubFileEditorTool --- .../connectors/github/file_editor.py | 2 + .../tools/github/file_editor_tool.py | 82 ++++++++++++++++++- .../github/tests/test_file_editor_tool.py | 59 +++++++++++++ 3 files changed, 141 insertions(+), 2 deletions(-) diff --git a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py index cee4939cdf..2f543b7b72 100644 --- a/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py +++ b/integrations/github/src/haystack_integrations/components/connectors/github/file_editor.py @@ -90,6 +90,8 @@ def __init__( :param repo: Default repository in owner/repo format :param branch: Default branch to work with :param raise_on_failure: If True, raises exceptions on API errors + + :raises TypeError: If github_token is not a Secret """ if not isinstance(github_token, Secret): error_message = "github_token must be a Secret" diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index 9b54105726..8216d10029 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -1,11 +1,12 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Callable, Dict, Optional, Union from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace +from haystack.utils.callable_serialization import deserialize_callable, serialize_callable from haystack_integrations.components.connectors.github.file_editor import GitHubFileEditor from haystack_integrations.prompts.github.file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA @@ -14,6 +15,34 @@ class GitHubFileEditorTool(ComponentTool): """ A tool for editing files in GitHub repositories. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } """ def __init__( @@ -26,6 +55,9 @@ def __init__( repo: Optional[str] = None, branch: str = "main", raise_on_failure: bool = True, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): self.name = name self.description = description @@ -34,6 +66,9 @@ def __init__( self.repo = repo self.branch = branch self.raise_on_failure = raise_on_failure + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state file_editor = GitHubFileEditor( github_token=github_token, @@ -46,6 +81,9 @@ def __init__( name=name, description=description, parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, ) def to_dict(self) -> Dict[str, Any]: @@ -55,7 +93,7 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - return default_to_dict( + serialized = default_to_dict( self, name=self.name, description=self.description, @@ -64,8 +102,29 @@ def to_dict(self) -> Dict[str, Any]: repo=self.repo, branch=self.branch, raise_on_failure=self.raise_on_failure, + outputs_to_string=self.outputs_to_string, + inputs_from_state=self.inputs_from_state, + outputs_to_state=self.outputs_to_state, ) + # Handle serialization of callable handlers based on the code in ComponentTool.to_dict + if self.outputs_to_state is not None: + serialized_outputs = {} + for key, config in self.outputs_to_state.items(): + serialized_config = config.copy() + if "handler" in config: + serialized_config["handler"] = serialize_callable(config["handler"]) + serialized_outputs[key] = serialized_config + serialized["init_parameters"]["outputs_to_state"] = serialized_outputs + + if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None: + serialized["init_parameters"]["outputs_to_string"] = { + **self.outputs_to_string, + "handler": serialize_callable(self.outputs_to_string["handler"]), + } + + return serialized + @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": """ @@ -77,4 +136,23 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": Deserialized tool. """ deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) + + # Handle deserialization of callable handlers based on the code in ComponentTool.from_dict + if "outputs_to_state" in data["init_parameters"] and data["init_parameters"]["outputs_to_state"]: + deserialized_outputs = {} + for key, config in data["init_parameters"]["outputs_to_state"].items(): + deserialized_config = config.copy() + if "handler" in config: + deserialized_config["handler"] = deserialize_callable(config["handler"]) + deserialized_outputs[key] = deserialized_config + data["init_parameters"]["outputs_to_state"] = deserialized_outputs + + if ( + data["init_parameters"].get("outputs_to_string") is not None + and data["init_parameters"]["outputs_to_string"].get("handler") is not None + ): + data["init_parameters"]["outputs_to_string"]["handler"] = deserialize_callable( + data["init_parameters"]["outputs_to_string"]["handler"] + ) + return default_from_dict(cls, data) diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py index 72614cfa42..26886115e8 100644 --- a/integrations/github/tests/test_file_editor_tool.py +++ b/integrations/github/tests/test_file_editor_tool.py @@ -10,6 +10,15 @@ from haystack_integrations.tools.github.file_editor_tool import GitHubFileEditorTool +def custom_handler(value): + """A test handler function for serialization tests.""" + return f"Processed: {value}" + + +# Make the handler available at module level +__all__ = ["custom_handler"] + + class TestGitHubFileEditorTool: def test_init(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") @@ -58,6 +67,53 @@ def test_to_dict(self, monkeypatch): assert tool_dict["init_parameters"]["branch"] == "main" assert tool_dict["init_parameters"]["raise_on_failure"] + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool = GitHubFileEditorTool( + outputs_to_string={"source": "result", "handler": custom_handler}, + inputs_from_state={"repo_state": "repo"}, + outputs_to_state={"file_content": {"source": "content", "handler": custom_handler}}, + ) + + tool_dict = tool.to_dict() + assert tool_dict["init_parameters"]["outputs_to_string"] == { + "source": "result", + "handler": "tests.test_file_editor_tool.custom_handler", + } + assert tool_dict["init_parameters"]["inputs_from_state"] == {"repo_state": "repo"} + assert tool_dict["init_parameters"]["outputs_to_state"] == { + "file_content": {"source": "content", "handler": "tests.test_file_editor_tool.custom_handler"} + } + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool_dict = { + "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", + "init_parameters": { + "name": "file_editor", + "description": FILE_EDITOR_PROMPT, + "parameters": FILE_EDITOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "repo": None, + "branch": "main", + "raise_on_failure": True, + "outputs_to_string": {"source": "result", "handler": "tests.test_file_editor_tool.custom_handler"}, + "inputs_from_state": {"repo_state": "repo"}, + "outputs_to_state": { + "file_content": {"source": "content", "handler": "tests.test_file_editor_tool.custom_handler"} + }, + }, + } + + tool = GitHubFileEditorTool.from_dict(tool_dict) + assert tool.outputs_to_string["source"] == "result" + assert tool.outputs_to_string["handler"] == custom_handler + assert tool.inputs_from_state == {"repo_state": "repo"} + assert tool.outputs_to_state["file_content"]["source"] == "content" + assert tool.outputs_to_state["file_content"]["handler"] == custom_handler + def test_pipeline_serialization(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") monkeypatch.setenv("OPENAI_API_KEY", "test-token") @@ -108,6 +164,9 @@ def test_pipeline_serialization(self, monkeypatch): "repo": None, "branch": "main", "raise_on_failure": True, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, }, } ], From 91ac951893b150f59c4d48438cf699a070f5a87b Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 10:25:14 +0200 Subject: [PATCH 41/51] add outputs_to_state as param to GitHubRepoViewerTool --- .../tools/github/repo_viewer_tool.py | 83 ++++++++++++++++++- .../github/tests/test_repo_viewer_tool.py | 57 +++++++++++++ 2 files changed, 138 insertions(+), 2 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index 712ece19a6..11c4e5202f 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -1,11 +1,12 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Callable, Dict, Optional, Union from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace +from haystack.utils.callable_serialization import deserialize_callable, serialize_callable from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA @@ -14,6 +15,35 @@ class GitHubRepoViewerTool(ComponentTool): """ A tool for viewing files and directories in GitHub repositories. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + :param max_file_size: Maximum file size in bytes to read + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } """ def __init__( @@ -27,6 +57,9 @@ def __init__( branch: str = "main", raise_on_failure: bool = True, max_file_size: int = 1_000_000, # 1MB default limit + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): self.name = name self.description = description @@ -36,6 +69,9 @@ def __init__( self.branch = branch self.raise_on_failure = raise_on_failure self.max_file_size = max_file_size + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state repo_viewer = GitHubRepoViewer( github_token=github_token, @@ -49,6 +85,9 @@ def __init__( name=name, description=description, parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, ) def to_dict(self) -> Dict[str, Any]: @@ -58,7 +97,7 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - return default_to_dict( + serialized = default_to_dict( self, name=self.name, description=self.description, @@ -68,8 +107,29 @@ def to_dict(self) -> Dict[str, Any]: branch=self.branch, raise_on_failure=self.raise_on_failure, max_file_size=self.max_file_size, + outputs_to_string=self.outputs_to_string, + inputs_from_state=self.inputs_from_state, + outputs_to_state=self.outputs_to_state, ) + # Handle serialization of callable handlers based on the code in ComponentTool.to_dict + if self.outputs_to_state is not None: + serialized_outputs = {} + for key, config in self.outputs_to_state.items(): + serialized_config = config.copy() + if "handler" in config: + serialized_config["handler"] = serialize_callable(config["handler"]) + serialized_outputs[key] = serialized_config + serialized["init_parameters"]["outputs_to_state"] = serialized_outputs + + if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None: + serialized["init_parameters"]["outputs_to_string"] = { + **self.outputs_to_string, + "handler": serialize_callable(self.outputs_to_string["handler"]), + } + + return serialized + @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": """ @@ -81,4 +141,23 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": Deserialized tool. """ deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) + + # Handle deserialization of callable handlers based on the code in ComponentTool.from_dict + if "outputs_to_state" in data["init_parameters"] and data["init_parameters"]["outputs_to_state"]: + deserialized_outputs = {} + for key, config in data["init_parameters"]["outputs_to_state"].items(): + deserialized_config = config.copy() + if "handler" in config: + deserialized_config["handler"] = deserialize_callable(config["handler"]) + deserialized_outputs[key] = deserialized_config + data["init_parameters"]["outputs_to_state"] = deserialized_outputs + + if ( + data["init_parameters"].get("outputs_to_string") is not None + and data["init_parameters"]["outputs_to_string"].get("handler") is not None + ): + data["init_parameters"]["outputs_to_string"]["handler"] = deserialize_callable( + data["init_parameters"]["outputs_to_string"]["handler"] + ) + return default_from_dict(cls, data) diff --git a/integrations/github/tests/test_repo_viewer_tool.py b/integrations/github/tests/test_repo_viewer_tool.py index 47bc2b64c4..a3e7544250 100644 --- a/integrations/github/tests/test_repo_viewer_tool.py +++ b/integrations/github/tests/test_repo_viewer_tool.py @@ -5,6 +5,15 @@ from haystack_integrations.tools.github.repo_viewer_tool import GitHubRepoViewerTool +def custom_handler(value): + """A test handler function for serialization tests.""" + return f"Processed: {value}" + + +# Make the handler available at module level +__all__ = ["custom_handler"] + + class TestGitHubRepoViewerTool: def test_init(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") @@ -52,3 +61,51 @@ def test_to_dict(self, monkeypatch): assert tool_dict["init_parameters"]["branch"] == "main" assert tool_dict["init_parameters"]["raise_on_failure"] assert tool_dict["init_parameters"]["max_file_size"] == 1_000_000 + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool = GitHubRepoViewerTool( + outputs_to_string={"source": "result", "handler": custom_handler}, + inputs_from_state={"repo_state": "repo"}, + outputs_to_state={"file_content": {"source": "content", "handler": custom_handler}}, + ) + + tool_dict = tool.to_dict() + assert tool_dict["init_parameters"]["outputs_to_string"] == { + "source": "result", + "handler": "tests.test_repo_viewer_tool.custom_handler", + } + assert tool_dict["init_parameters"]["inputs_from_state"] == {"repo_state": "repo"} + assert tool_dict["init_parameters"]["outputs_to_state"] == { + "file_content": {"source": "content", "handler": "tests.test_repo_viewer_tool.custom_handler"} + } + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + + tool_dict = { + "type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", + "init_parameters": { + "name": "repo_viewer", + "description": REPO_VIEWER_PROMPT, + "parameters": REPO_VIEWER_SCHEMA, + "github_token": None, + "repo": None, + "branch": "main", + "raise_on_failure": True, + "max_file_size": 1_000_000, + "outputs_to_string": {"source": "result", "handler": "tests.test_repo_viewer_tool.custom_handler"}, + "inputs_from_state": {"repo_state": "repo"}, + "outputs_to_state": { + "file_content": {"source": "content", "handler": "tests.test_repo_viewer_tool.custom_handler"} + }, + }, + } + + tool = GitHubRepoViewerTool.from_dict(tool_dict) + assert tool.outputs_to_string["source"] == "result" + assert tool.outputs_to_string["handler"] == custom_handler + assert tool.inputs_from_state == {"repo_state": "repo"} + assert tool.outputs_to_state["file_content"]["source"] == "content" + assert tool.outputs_to_state["file_content"]["handler"] == custom_handler From 560abfd4bd149375cd15a7749ba36f86425828e6 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 10:43:03 +0200 Subject: [PATCH 42/51] set default outputs_to_state for GitHubRepoViewerTool --- .../tools/github/repo_viewer_tool.py | 10 +++++--- .../tools/github/utils.py | 25 +++++++++++++++++++ .../github/tests/test_repo_viewer_tool.py | 20 +++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 integrations/github/src/haystack_integrations/tools/github/utils.py diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index 11c4e5202f..16d05e05fa 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -10,6 +10,7 @@ from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA +from haystack_integrations.tools.github.utils import message_handler class GitHubRepoViewerTool(ComponentTool): @@ -57,9 +58,12 @@ def __init__( branch: str = "main", raise_on_failure: bool = True, max_file_size: int = 1_000_000, # 1MB default limit - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, - inputs_from_state: Optional[Dict[str, str]] = None, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = { + "source": "documents", + "handler": message_handler, + }, + inputs_from_state: Optional[Dict[str, str]] = {}, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = {"documents": {"source": "documents"}}, ): self.name = name self.description = description diff --git a/integrations/github/src/haystack_integrations/tools/github/utils.py b/integrations/github/src/haystack_integrations/tools/github/utils.py new file mode 100644 index 0000000000..356654f9c2 --- /dev/null +++ b/integrations/github/src/haystack_integrations/tools/github/utils.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import List + +from haystack import Document + + +def message_handler(documents: List[Document]) -> str: + """ + Handles the tool output before conversion to ChatMessage. + """ + result_str = "" + for document in documents: + if document.meta["type"] in ["file", "dir", "error"]: + result_str += document.content + "\n" + else: + result_str += f"File Content for {document.meta['path']}\n\n" + result_str += document.content + + if len(result_str) > 150_000: + result_str = result_str[:150_000] + "...(large file can't be fully displayed)" + + return result_str diff --git a/integrations/github/tests/test_repo_viewer_tool.py b/integrations/github/tests/test_repo_viewer_tool.py index a3e7544250..05bd9108d9 100644 --- a/integrations/github/tests/test_repo_viewer_tool.py +++ b/integrations/github/tests/test_repo_viewer_tool.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA from haystack_integrations.tools.github.repo_viewer_tool import GitHubRepoViewerTool +from haystack_integrations.tools.github.utils import message_handler def custom_handler(value): @@ -22,6 +23,9 @@ def test_init(self, monkeypatch): assert tool.description == REPO_VIEWER_PROMPT assert tool.parameters == REPO_VIEWER_SCHEMA assert tool.max_file_size == 1_000_000 + assert tool.outputs_to_string == {"source": "documents", "handler": message_handler} + assert tool.inputs_from_state == {} + assert tool.outputs_to_state == {"documents": {"source": "documents"}} def test_from_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") @@ -36,6 +40,12 @@ def test_from_dict(self, monkeypatch): "branch": "main", "raise_on_failure": True, "max_file_size": 1_000_000, + "outputs_to_string": { + "source": "documents", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, + "inputs_from_state": {}, + "outputs_to_state": {"documents": {"source": "documents"}}, }, } tool = GitHubRepoViewerTool.from_dict(tool_dict) @@ -47,6 +57,10 @@ def test_from_dict(self, monkeypatch): assert tool.branch == "main" assert tool.raise_on_failure assert tool.max_file_size == 1_000_000 + assert tool.outputs_to_string["source"] == "documents" + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {} + assert tool.outputs_to_state == {"documents": {"source": "documents"}} def test_to_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") @@ -61,6 +75,12 @@ def test_to_dict(self, monkeypatch): assert tool_dict["init_parameters"]["branch"] == "main" assert tool_dict["init_parameters"]["raise_on_failure"] assert tool_dict["init_parameters"]["max_file_size"] == 1_000_000 + assert tool_dict["init_parameters"]["outputs_to_string"] == { + "source": "documents", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + assert tool_dict["init_parameters"]["inputs_from_state"] == {} + assert tool_dict["init_parameters"]["outputs_to_state"] == {"documents": {"source": "documents"}} def test_to_dict_with_extra_params(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") From d754b016b5031d91ffee4f0f25c945d800864b18 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 11:18:57 +0200 Subject: [PATCH 43/51] extract serialize_handlers to utils; don't use mutable defaults --- .../tools/github/file_editor_tool.py | 39 +---------- .../tools/github/repo_viewer_tool.py | 66 +++++-------------- .../tools/github/utils.py | 66 +++++++++++++++++-- 3 files changed, 82 insertions(+), 89 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index 8216d10029..40b9184fb0 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -6,10 +6,10 @@ from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace -from haystack.utils.callable_serialization import deserialize_callable, serialize_callable from haystack_integrations.components.connectors.github.file_editor import GitHubFileEditor from haystack_integrations.prompts.github.file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers class GitHubFileEditorTool(ComponentTool): @@ -107,22 +107,7 @@ def to_dict(self) -> Dict[str, Any]: outputs_to_state=self.outputs_to_state, ) - # Handle serialization of callable handlers based on the code in ComponentTool.to_dict - if self.outputs_to_state is not None: - serialized_outputs = {} - for key, config in self.outputs_to_state.items(): - serialized_config = config.copy() - if "handler" in config: - serialized_config["handler"] = serialize_callable(config["handler"]) - serialized_outputs[key] = serialized_config - serialized["init_parameters"]["outputs_to_state"] = serialized_outputs - - if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None: - serialized["init_parameters"]["outputs_to_string"] = { - **self.outputs_to_string, - "handler": serialize_callable(self.outputs_to_string["handler"]), - } - + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) return serialized @classmethod @@ -136,23 +121,5 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": Deserialized tool. """ deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) - - # Handle deserialization of callable handlers based on the code in ComponentTool.from_dict - if "outputs_to_state" in data["init_parameters"] and data["init_parameters"]["outputs_to_state"]: - deserialized_outputs = {} - for key, config in data["init_parameters"]["outputs_to_state"].items(): - deserialized_config = config.copy() - if "handler" in config: - deserialized_config["handler"] = deserialize_callable(config["handler"]) - deserialized_outputs[key] = deserialized_config - data["init_parameters"]["outputs_to_state"] = deserialized_outputs - - if ( - data["init_parameters"].get("outputs_to_string") is not None - and data["init_parameters"]["outputs_to_string"].get("handler") is not None - ): - data["init_parameters"]["outputs_to_string"]["handler"] = deserialize_callable( - data["init_parameters"]["outputs_to_string"]["handler"] - ) - + deserialize_handlers(data) return default_from_dict(cls, data) diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index 16d05e05fa..fa7c93ae9d 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -6,11 +6,10 @@ from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace -from haystack.utils.callable_serialization import deserialize_callable, serialize_callable from haystack_integrations.components.connectors.github.repo_viewer import GitHubRepoViewer from haystack_integrations.prompts.github.repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA -from haystack_integrations.tools.github.utils import message_handler +from haystack_integrations.tools.github.utils import deserialize_handlers, message_handler, serialize_handlers class GitHubRepoViewerTool(ComponentTool): @@ -27,6 +26,7 @@ class GitHubRepoViewerTool(ComponentTool): :param max_file_size: Maximum file size in bytes to read :param outputs_to_string: Optional dictionary defining how a tool outputs should be converted into a string. + By default, truncates the document.content of the viewed files to 150,000 characters each. If the source is provided only the specified output key is sent to the handler. If the source is omitted the whole tool result is sent to the handler. Example: { @@ -34,9 +34,11 @@ class GitHubRepoViewerTool(ComponentTool): } :param inputs_from_state: Optional dictionary mapping state keys to tool parameter names. + By default, the tool does not use any inputs from state. Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. :param outputs_to_state: Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + By default, outputs the viewed files as documents to the state. If the source is provided only the specified output key is sent to the handler. Example: { "documents": {"source": "docs", "handler": custom_handler} @@ -58,12 +60,9 @@ def __init__( branch: str = "main", raise_on_failure: bool = True, max_file_size: int = 1_000_000, # 1MB default limit - outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = { - "source": "documents", - "handler": message_handler, - }, - inputs_from_state: Optional[Dict[str, str]] = {}, - outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = {"documents": {"source": "documents"}}, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): self.name = name self.description = description @@ -73,9 +72,11 @@ def __init__( self.branch = branch self.raise_on_failure = raise_on_failure self.max_file_size = max_file_size - self.outputs_to_string = outputs_to_string - self.inputs_from_state = inputs_from_state - self.outputs_to_state = outputs_to_state + + # Set default values for mutable parameters + self.outputs_to_string = outputs_to_string or {"source": "documents", "handler": message_handler} + self.inputs_from_state = inputs_from_state or {} + self.outputs_to_state = outputs_to_state or {"documents": {"source": "documents"}} repo_viewer = GitHubRepoViewer( github_token=github_token, @@ -89,9 +90,9 @@ def __init__( name=name, description=description, parameters=parameters, - outputs_to_string=outputs_to_string, - inputs_from_state=inputs_from_state, - outputs_to_state=outputs_to_state, + outputs_to_string=self.outputs_to_string, + inputs_from_state=self.inputs_from_state, + outputs_to_state=self.outputs_to_state, ) def to_dict(self) -> Dict[str, Any]: @@ -116,22 +117,7 @@ def to_dict(self) -> Dict[str, Any]: outputs_to_state=self.outputs_to_state, ) - # Handle serialization of callable handlers based on the code in ComponentTool.to_dict - if self.outputs_to_state is not None: - serialized_outputs = {} - for key, config in self.outputs_to_state.items(): - serialized_config = config.copy() - if "handler" in config: - serialized_config["handler"] = serialize_callable(config["handler"]) - serialized_outputs[key] = serialized_config - serialized["init_parameters"]["outputs_to_state"] = serialized_outputs - - if self.outputs_to_string is not None and self.outputs_to_string.get("handler") is not None: - serialized["init_parameters"]["outputs_to_string"] = { - **self.outputs_to_string, - "handler": serialize_callable(self.outputs_to_string["handler"]), - } - + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) return serialized @classmethod @@ -145,23 +131,5 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": Deserialized tool. """ deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) - - # Handle deserialization of callable handlers based on the code in ComponentTool.from_dict - if "outputs_to_state" in data["init_parameters"] and data["init_parameters"]["outputs_to_state"]: - deserialized_outputs = {} - for key, config in data["init_parameters"]["outputs_to_state"].items(): - deserialized_config = config.copy() - if "handler" in config: - deserialized_config["handler"] = deserialize_callable(config["handler"]) - deserialized_outputs[key] = deserialized_config - data["init_parameters"]["outputs_to_state"] = deserialized_outputs - - if ( - data["init_parameters"].get("outputs_to_string") is not None - and data["init_parameters"]["outputs_to_string"].get("handler") is not None - ): - data["init_parameters"]["outputs_to_string"]["handler"] = deserialize_callable( - data["init_parameters"]["outputs_to_string"]["handler"] - ) - + deserialize_handlers(data) return default_from_dict(cls, data) diff --git a/integrations/github/src/haystack_integrations/tools/github/utils.py b/integrations/github/src/haystack_integrations/tools/github/utils.py index 356654f9c2..8ff36295a4 100644 --- a/integrations/github/src/haystack_integrations/tools/github/utils.py +++ b/integrations/github/src/haystack_integrations/tools/github/utils.py @@ -2,14 +2,20 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import List +from typing import Any, Callable, Dict, List, Union from haystack import Document +from haystack.utils.callable_serialization import deserialize_callable, serialize_callable -def message_handler(documents: List[Document]) -> str: +def message_handler(documents: List[Document], max_length: int = 150_000) -> str: """ Handles the tool output before conversion to ChatMessage. + + :param documents: List of Document objects + :param max_length: Maximum number of characters of the result string + :returns: + String representation of the documents. """ result_str = "" for document in documents: @@ -19,7 +25,59 @@ def message_handler(documents: List[Document]) -> str: result_str += f"File Content for {document.meta['path']}\n\n" result_str += document.content - if len(result_str) > 150_000: - result_str = result_str[:150_000] + "...(large file can't be fully displayed)" + if len(result_str) > max_length: + result_str = result_str[:max_length] + "...(large file can't be fully displayed)" return result_str + + +def serialize_handlers( + serialized: Dict[str, Any], + outputs_to_state: Dict[str, Dict[str, Union[str, Callable]]], + outputs_to_string: Dict[str, Union[str, Callable[[Any], str]]], +) -> None: + """ + Serializes callable handlers in outputs_to_state and outputs_to_string. + + :param serialized: The dictionary to update with serialized handlers + :param outputs_to_state: Dictionary containing state output configurations + :param outputs_to_string: Dictionary containing string output configuration + """ + if outputs_to_state is not None: + serialized_outputs = {} + for key, config in outputs_to_state.items(): + serialized_config = config.copy() + if "handler" in config: + serialized_config["handler"] = serialize_callable(config["handler"]) + serialized_outputs[key] = serialized_config + serialized["init_parameters"]["outputs_to_state"] = serialized_outputs + + if outputs_to_string is not None and outputs_to_string.get("handler") is not None: + serialized["init_parameters"]["outputs_to_string"] = { + **outputs_to_string, + "handler": serialize_callable(outputs_to_string["handler"]), + } + + +def deserialize_handlers(data: Dict[str, Any]) -> None: + """ + Deserializes callable handlers in outputs_to_state and outputs_to_string. + + :param data: The dictionary containing serialized handlers to deserialize + """ + if "outputs_to_state" in data["init_parameters"] and data["init_parameters"]["outputs_to_state"]: + deserialized_outputs = {} + for key, config in data["init_parameters"]["outputs_to_state"].items(): + deserialized_config = config.copy() + if "handler" in config: + deserialized_config["handler"] = deserialize_callable(config["handler"]) + deserialized_outputs[key] = deserialized_config + data["init_parameters"]["outputs_to_state"] = deserialized_outputs + + if ( + data["init_parameters"].get("outputs_to_string") is not None + and data["init_parameters"]["outputs_to_string"].get("handler") is not None + ): + data["init_parameters"]["outputs_to_string"]["handler"] = deserialize_callable( + data["init_parameters"]["outputs_to_string"]["handler"] + ) From 9860266f35e8f0e6319a0ccf05a654d5cd0d7eb7 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 12:46:17 +0200 Subject: [PATCH 44/51] replace init_parameters with data for serde in FileEditor, RepoViewer --- .../tools/github/file_editor_tool.py | 35 +++++---- .../tools/github/repo_viewer_tool.py | 37 +++++----- .../tools/github/utils.py | 30 +++----- .../github/tests/test_file_editor_tool.py | 71 +++++++++++-------- .../github/tests/test_repo_viewer_tool.py | 70 +++++++++--------- 5 files changed, 123 insertions(+), 120 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index 40b9184fb0..2c7ed45110 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Any, Callable, Dict, Optional, Union -from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace @@ -93,22 +92,21 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - serialized = default_to_dict( - self, - name=self.name, - description=self.description, - parameters=self.parameters, - github_token=self.github_token.to_dict(), - repo=self.repo, - branch=self.branch, - raise_on_failure=self.raise_on_failure, - outputs_to_string=self.outputs_to_string, - inputs_from_state=self.inputs_from_state, - outputs_to_state=self.outputs_to_state, - ) + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "repo": self.repo, + "branch": self.branch, + "raise_on_failure": self.raise_on_failure, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) - return serialized + return {"type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": @@ -120,6 +118,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": :returns: Deserialized tool. """ - deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) - deserialize_handlers(data) - return default_from_dict(cls, data) + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index fa7c93ae9d..d53193bca6 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Any, Callable, Dict, Optional, Union -from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace @@ -102,23 +101,22 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - serialized = default_to_dict( - self, - name=self.name, - description=self.description, - parameters=self.parameters, - github_token=self.github_token.to_dict() if self.github_token else None, - repo=self.repo, - branch=self.branch, - raise_on_failure=self.raise_on_failure, - max_file_size=self.max_file_size, - outputs_to_string=self.outputs_to_string, - inputs_from_state=self.inputs_from_state, - outputs_to_state=self.outputs_to_state, - ) + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "repo": self.repo, + "branch": self.branch, + "raise_on_failure": self.raise_on_failure, + "max_file_size": self.max_file_size, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) - return serialized + return {"type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": @@ -130,6 +128,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": :returns: Deserialized tool. """ - deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) - deserialize_handlers(data) - return default_from_dict(cls, data) + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/src/haystack_integrations/tools/github/utils.py b/integrations/github/src/haystack_integrations/tools/github/utils.py index 8ff36295a4..0d4c743302 100644 --- a/integrations/github/src/haystack_integrations/tools/github/utils.py +++ b/integrations/github/src/haystack_integrations/tools/github/utils.py @@ -50,13 +50,12 @@ def serialize_handlers( if "handler" in config: serialized_config["handler"] = serialize_callable(config["handler"]) serialized_outputs[key] = serialized_config - serialized["init_parameters"]["outputs_to_state"] = serialized_outputs + serialized["outputs_to_state"] = serialized_outputs - if outputs_to_string is not None and outputs_to_string.get("handler") is not None: - serialized["init_parameters"]["outputs_to_string"] = { - **outputs_to_string, - "handler": serialize_callable(outputs_to_string["handler"]), - } + if outputs_to_string is not None and "handler" in outputs_to_string: + serialized_string = outputs_to_string.copy() + serialized_string["handler"] = serialize_callable(outputs_to_string["handler"]) + serialized["outputs_to_string"] = serialized_string def deserialize_handlers(data: Dict[str, Any]) -> None: @@ -65,19 +64,10 @@ def deserialize_handlers(data: Dict[str, Any]) -> None: :param data: The dictionary containing serialized handlers to deserialize """ - if "outputs_to_state" in data["init_parameters"] and data["init_parameters"]["outputs_to_state"]: - deserialized_outputs = {} - for key, config in data["init_parameters"]["outputs_to_state"].items(): - deserialized_config = config.copy() + if data.get("outputs_to_state"): + for config in data["outputs_to_state"].values(): if "handler" in config: - deserialized_config["handler"] = deserialize_callable(config["handler"]) - deserialized_outputs[key] = deserialized_config - data["init_parameters"]["outputs_to_state"] = deserialized_outputs + config["handler"] = deserialize_callable(config["handler"]) - if ( - data["init_parameters"].get("outputs_to_string") is not None - and data["init_parameters"]["outputs_to_string"].get("handler") is not None - ): - data["init_parameters"]["outputs_to_string"]["handler"] = deserialize_callable( - data["init_parameters"]["outputs_to_string"]["handler"] - ) + if "outputs_to_string" in data and data["outputs_to_string"] and "handler" in data["outputs_to_string"]: + data["outputs_to_string"]["handler"] = deserialize_callable(data["outputs_to_string"]["handler"]) diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py index 26886115e8..c1b8d6aefe 100644 --- a/integrations/github/tests/test_file_editor_tool.py +++ b/integrations/github/tests/test_file_editor_tool.py @@ -8,15 +8,7 @@ from haystack_integrations.prompts.github.file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA from haystack_integrations.tools.github.file_editor_tool import GitHubFileEditorTool - - -def custom_handler(value): - """A test handler function for serialization tests.""" - return f"Processed: {value}" - - -# Make the handler available at module level -__all__ = ["custom_handler"] +from haystack_integrations.tools.github.utils import message_handler class TestGitHubFileEditorTool: @@ -26,12 +18,19 @@ def test_init(self, monkeypatch): assert tool.name == "file_editor" assert tool.description == FILE_EDITOR_PROMPT assert tool.parameters == FILE_EDITOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None def test_from_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool_dict = { "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", - "init_parameters": { + "data": { "name": "file_editor", "description": FILE_EDITOR_PROMPT, "parameters": FILE_EDITOR_SCHEMA, @@ -55,35 +54,41 @@ def test_to_dict(self, monkeypatch): tool = GitHubFileEditorTool() tool_dict = tool.to_dict() assert tool_dict["type"] == "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool" - assert tool_dict["init_parameters"]["name"] == "file_editor" - assert tool_dict["init_parameters"]["description"] == FILE_EDITOR_PROMPT - assert tool_dict["init_parameters"]["parameters"] == FILE_EDITOR_SCHEMA - assert tool_dict["init_parameters"]["github_token"] == { + assert tool_dict["data"]["name"] == "file_editor" + assert tool_dict["data"]["description"] == FILE_EDITOR_PROMPT + assert tool_dict["data"]["parameters"] == FILE_EDITOR_SCHEMA + assert tool_dict["data"]["github_token"] == { "env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var", } - assert tool_dict["init_parameters"]["repo"] is None - assert tool_dict["init_parameters"]["branch"] == "main" - assert tool_dict["init_parameters"]["raise_on_failure"] + assert tool_dict["data"]["repo"] is None + assert tool_dict["data"]["branch"] == "main" + assert tool_dict["data"]["raise_on_failure"] + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None def test_to_dict_with_extra_params(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool = GitHubFileEditorTool( - outputs_to_string={"source": "result", "handler": custom_handler}, + outputs_to_string={"source": "result", "handler": message_handler}, inputs_from_state={"repo_state": "repo"}, - outputs_to_state={"file_content": {"source": "content", "handler": custom_handler}}, + outputs_to_state={"file_content": {"source": "content", "handler": message_handler}}, ) tool_dict = tool.to_dict() - assert tool_dict["init_parameters"]["outputs_to_string"] == { + assert tool_dict["data"]["outputs_to_string"] == { "source": "result", - "handler": "tests.test_file_editor_tool.custom_handler", + "handler": "haystack_integrations.tools.github.utils.message_handler", } - assert tool_dict["init_parameters"]["inputs_from_state"] == {"repo_state": "repo"} - assert tool_dict["init_parameters"]["outputs_to_state"] == { - "file_content": {"source": "content", "handler": "tests.test_file_editor_tool.custom_handler"} + assert tool_dict["data"]["inputs_from_state"] == {"repo_state": "repo"} + assert tool_dict["data"]["outputs_to_state"] == { + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, } def test_from_dict_with_extra_params(self, monkeypatch): @@ -91,7 +96,7 @@ def test_from_dict_with_extra_params(self, monkeypatch): tool_dict = { "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", - "init_parameters": { + "data": { "name": "file_editor", "description": FILE_EDITOR_PROMPT, "parameters": FILE_EDITOR_SCHEMA, @@ -99,20 +104,26 @@ def test_from_dict_with_extra_params(self, monkeypatch): "repo": None, "branch": "main", "raise_on_failure": True, - "outputs_to_string": {"source": "result", "handler": "tests.test_file_editor_tool.custom_handler"}, + "outputs_to_string": { + "source": "result", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, "inputs_from_state": {"repo_state": "repo"}, "outputs_to_state": { - "file_content": {"source": "content", "handler": "tests.test_file_editor_tool.custom_handler"} + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, }, }, } tool = GitHubFileEditorTool.from_dict(tool_dict) assert tool.outputs_to_string["source"] == "result" - assert tool.outputs_to_string["handler"] == custom_handler + assert tool.outputs_to_string["handler"] == message_handler assert tool.inputs_from_state == {"repo_state": "repo"} assert tool.outputs_to_state["file_content"]["source"] == "content" - assert tool.outputs_to_state["file_content"]["handler"] == custom_handler + assert tool.outputs_to_state["file_content"]["handler"] == message_handler def test_pipeline_serialization(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") @@ -156,7 +167,7 @@ def test_pipeline_serialization(self, monkeypatch): "tools": [ { "type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", - "init_parameters": { + "data": { "name": "file_editor", "description": FILE_EDITOR_PROMPT, "parameters": FILE_EDITOR_SCHEMA, diff --git a/integrations/github/tests/test_repo_viewer_tool.py b/integrations/github/tests/test_repo_viewer_tool.py index 05bd9108d9..d83a7a68d3 100644 --- a/integrations/github/tests/test_repo_viewer_tool.py +++ b/integrations/github/tests/test_repo_viewer_tool.py @@ -6,15 +6,6 @@ from haystack_integrations.tools.github.utils import message_handler -def custom_handler(value): - """A test handler function for serialization tests.""" - return f"Processed: {value}" - - -# Make the handler available at module level -__all__ = ["custom_handler"] - - class TestGitHubRepoViewerTool: def test_init(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") @@ -23,6 +14,10 @@ def test_init(self, monkeypatch): assert tool.description == REPO_VIEWER_PROMPT assert tool.parameters == REPO_VIEWER_SCHEMA assert tool.max_file_size == 1_000_000 + assert tool.github_token is None + assert tool.repo is None + assert tool.branch == "main" + assert tool.raise_on_failure assert tool.outputs_to_string == {"source": "documents", "handler": message_handler} assert tool.inputs_from_state == {} assert tool.outputs_to_state == {"documents": {"source": "documents"}} @@ -31,7 +26,7 @@ def test_from_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool_dict = { "type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", - "init_parameters": { + "data": { "name": "repo_viewer", "description": REPO_VIEWER_PROMPT, "parameters": REPO_VIEWER_SCHEMA, @@ -67,38 +62,41 @@ def test_to_dict(self, monkeypatch): tool = GitHubRepoViewerTool() tool_dict = tool.to_dict() assert tool_dict["type"] == "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool" - assert tool_dict["init_parameters"]["name"] == "repo_viewer" - assert tool_dict["init_parameters"]["description"] == REPO_VIEWER_PROMPT - assert tool_dict["init_parameters"]["parameters"] == REPO_VIEWER_SCHEMA - assert tool_dict["init_parameters"]["github_token"] is None - assert tool_dict["init_parameters"]["repo"] is None - assert tool_dict["init_parameters"]["branch"] == "main" - assert tool_dict["init_parameters"]["raise_on_failure"] - assert tool_dict["init_parameters"]["max_file_size"] == 1_000_000 - assert tool_dict["init_parameters"]["outputs_to_string"] == { + assert tool_dict["data"]["name"] == "repo_viewer" + assert tool_dict["data"]["description"] == REPO_VIEWER_PROMPT + assert tool_dict["data"]["parameters"] == REPO_VIEWER_SCHEMA + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["repo"] is None + assert tool_dict["data"]["branch"] == "main" + assert tool_dict["data"]["raise_on_failure"] + assert tool_dict["data"]["max_file_size"] == 1_000_000 + assert tool_dict["data"]["outputs_to_string"] == { "source": "documents", "handler": "haystack_integrations.tools.github.utils.message_handler", } - assert tool_dict["init_parameters"]["inputs_from_state"] == {} - assert tool_dict["init_parameters"]["outputs_to_state"] == {"documents": {"source": "documents"}} + assert tool_dict["data"]["inputs_from_state"] == {} + assert tool_dict["data"]["outputs_to_state"] == {"documents": {"source": "documents"}} def test_to_dict_with_extra_params(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool = GitHubRepoViewerTool( - outputs_to_string={"source": "result", "handler": custom_handler}, + outputs_to_string={"source": "result", "handler": message_handler}, inputs_from_state={"repo_state": "repo"}, - outputs_to_state={"file_content": {"source": "content", "handler": custom_handler}}, + outputs_to_state={"file_content": {"source": "content", "handler": message_handler}}, ) tool_dict = tool.to_dict() - assert tool_dict["init_parameters"]["outputs_to_string"] == { + assert tool_dict["data"]["outputs_to_string"] == { "source": "result", - "handler": "tests.test_repo_viewer_tool.custom_handler", + "handler": "haystack_integrations.tools.github.utils.message_handler", } - assert tool_dict["init_parameters"]["inputs_from_state"] == {"repo_state": "repo"} - assert tool_dict["init_parameters"]["outputs_to_state"] == { - "file_content": {"source": "content", "handler": "tests.test_repo_viewer_tool.custom_handler"} + assert tool_dict["data"]["inputs_from_state"] == {"repo_state": "repo"} + assert tool_dict["data"]["outputs_to_state"] == { + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, } def test_from_dict_with_extra_params(self, monkeypatch): @@ -106,7 +104,7 @@ def test_from_dict_with_extra_params(self, monkeypatch): tool_dict = { "type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", - "init_parameters": { + "data": { "name": "repo_viewer", "description": REPO_VIEWER_PROMPT, "parameters": REPO_VIEWER_SCHEMA, @@ -115,17 +113,23 @@ def test_from_dict_with_extra_params(self, monkeypatch): "branch": "main", "raise_on_failure": True, "max_file_size": 1_000_000, - "outputs_to_string": {"source": "result", "handler": "tests.test_repo_viewer_tool.custom_handler"}, + "outputs_to_string": { + "source": "result", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, "inputs_from_state": {"repo_state": "repo"}, "outputs_to_state": { - "file_content": {"source": "content", "handler": "tests.test_repo_viewer_tool.custom_handler"} + "file_content": { + "source": "content", + "handler": "haystack_integrations.tools.github.utils.message_handler", + }, }, }, } tool = GitHubRepoViewerTool.from_dict(tool_dict) assert tool.outputs_to_string["source"] == "result" - assert tool.outputs_to_string["handler"] == custom_handler + assert tool.outputs_to_string["handler"] == message_handler assert tool.inputs_from_state == {"repo_state": "repo"} assert tool.outputs_to_state["file_content"]["source"] == "content" - assert tool.outputs_to_state["file_content"]["handler"] == custom_handler + assert tool.outputs_to_state["file_content"]["handler"] == message_handler From 0c52ff43452ca703179ae90f146b82dccf7c70ec Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 13:03:34 +0200 Subject: [PATCH 45/51] add outputs_to_state to GitHubIssueCommenterTool; replace init_parameters with data --- .../tools/github/issue_commenter_tool.py | 72 ++++++++++++--- .../github/tests/test_issue_commenter_tool.py | 92 +++++++++++++++++-- 2 files changed, 144 insertions(+), 20 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py index 58ac9dc47b..65248170b7 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py @@ -1,19 +1,46 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Callable, Dict, Optional, Union -from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.issue_commenter import GitHubIssueCommenter from haystack_integrations.prompts.github.issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers class GitHubIssueCommenterTool(ComponentTool): """ A tool for commenting on GitHub issues. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } """ def __init__( @@ -25,6 +52,9 @@ def __init__( github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, retry_attempts: int = 2, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): self.name = name self.description = description @@ -32,6 +62,9 @@ def __init__( self.github_token = github_token self.raise_on_failure = raise_on_failure self.retry_attempts = retry_attempts + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state issue_commenter = GitHubIssueCommenter( github_token=github_token, @@ -43,6 +76,9 @@ def __init__( name=name, description=description, parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, ) def to_dict(self) -> Dict[str, Any]: @@ -52,15 +88,23 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - return default_to_dict( - self, - name=self.name, - description=self.description, - parameters=self.parameters, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure, - retry_attempts=self.retry_attempts, - ) + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "retry_attempts": self.retry_attempts, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return { + "type": "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool", + "data": serialized, + } @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenterTool": @@ -72,5 +116,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenterTool": :returns: Deserialized tool. """ - deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) - return default_from_dict(cls, data) + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/tests/test_issue_commenter_tool.py b/integrations/github/tests/test_issue_commenter_tool.py index 32e4eaf0c8..2a4c19a4f7 100644 --- a/integrations/github/tests/test_issue_commenter_tool.py +++ b/integrations/github/tests/test_issue_commenter_tool.py @@ -5,6 +5,7 @@ from haystack_integrations.prompts.github.issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA from haystack_integrations.tools.github.issue_commenter_tool import GitHubIssueCommenterTool +from haystack_integrations.tools.github.utils import message_handler class TestGitHubIssueCommenterTool: @@ -15,18 +16,24 @@ def test_init(self, monkeypatch): assert tool.description == ISSUE_COMMENTER_PROMPT assert tool.parameters == ISSUE_COMMENTER_SCHEMA assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None def test_from_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool_dict = { "type": "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool", - "init_parameters": { + "data": { "name": "issue_commenter", "description": ISSUE_COMMENTER_PROMPT, "parameters": ISSUE_COMMENTER_SCHEMA, "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, "raise_on_failure": True, "retry_attempts": 2, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, }, } tool = GitHubIssueCommenterTool.from_dict(tool_dict) @@ -36,19 +43,90 @@ def test_from_dict(self, monkeypatch): assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") assert tool.raise_on_failure assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None def test_to_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool = GitHubIssueCommenterTool() tool_dict = tool.to_dict() assert tool_dict["type"] == "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool" - assert tool_dict["init_parameters"]["name"] == "issue_commenter" - assert tool_dict["init_parameters"]["description"] == ISSUE_COMMENTER_PROMPT - assert tool_dict["init_parameters"]["parameters"] == ISSUE_COMMENTER_SCHEMA - assert tool_dict["init_parameters"]["github_token"] == { + assert tool_dict["data"]["name"] == "issue_commenter" + assert tool_dict["data"]["description"] == ISSUE_COMMENTER_PROMPT + assert tool_dict["data"]["parameters"] == ISSUE_COMMENTER_SCHEMA + assert tool_dict["data"]["github_token"] == { "env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var", } - assert tool_dict["init_parameters"]["raise_on_failure"] - assert tool_dict["init_parameters"]["retry_attempts"] == 2 + assert tool_dict["data"]["raise_on_failure"] + assert tool_dict["data"]["retry_attempts"] == 2 + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueCommenterTool( + name="test_issue_commenter", + description="Test description", + parameters={"type": "object", "properties": {}}, + github_token=None, + raise_on_failure=False, + retry_attempts=3, + outputs_to_string={"handler": message_handler}, + inputs_from_state={"repository": "repo"}, + outputs_to_state={"documents": {"source": "docs", "handler": message_handler}}, + ) + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool" + assert tool_dict["data"]["name"] == "test_issue_commenter" + assert tool_dict["data"]["description"] == "Test description" + assert tool_dict["data"]["parameters"] == {"type": "object", "properties": {}} + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["raise_on_failure"] is False + assert tool_dict["data"]["retry_attempts"] == 3 + assert ( + tool_dict["data"]["outputs_to_string"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"} + assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs" + assert ( + tool_dict["data"]["outputs_to_state"]["documents"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool", + "data": { + "name": "test_issue_commenter", + "description": "Test description", + "parameters": {"type": "object", "properties": {}}, + "github_token": None, + "raise_on_failure": False, + "retry_attempts": 3, + "outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"}, + "inputs_from_state": {"repository": "repo"}, + "outputs_to_state": { + "documents": { + "source": "docs", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + }, + }, + } + tool = GitHubIssueCommenterTool.from_dict(tool_dict) + assert tool.name == "test_issue_commenter" + assert tool.description == "Test description" + assert tool.parameters == {"type": "object", "properties": {}} + assert tool.github_token is None + assert tool.raise_on_failure is False + assert tool.retry_attempts == 3 + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repository": "repo"} + assert tool.outputs_to_state["documents"]["source"] == "docs" + assert tool.outputs_to_state["documents"]["handler"] == message_handler From 3160373c9eccbc5407d35e6ff43041410167bbdc Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 13:16:34 +0200 Subject: [PATCH 46/51] add outputs_to_state to GitHubIssueViewerTool; replace init_parameters with data --- .../tools/github/issue_viewer_tool.py | 72 +++++++++--- .../github/tests/test_issue_viewer_tool.py | 106 +++++++++++++++--- 2 files changed, 152 insertions(+), 26 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py index 738399031a..5cbd99956b 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py @@ -1,19 +1,46 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Callable, Dict, Optional, Union -from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.issue_viewer import GitHubIssueViewer from haystack_integrations.prompts.github.issue_viewer_prompt import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers class GitHubIssueViewerTool(ComponentTool): """ A tool for viewing GitHub issues. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: Optional GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } """ def __init__( @@ -25,6 +52,9 @@ def __init__( github_token: Optional[Secret] = None, raise_on_failure: bool = True, retry_attempts: int = 2, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): self.name = name self.description = description @@ -32,6 +62,9 @@ def __init__( self.github_token = github_token self.raise_on_failure = raise_on_failure self.retry_attempts = retry_attempts + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state issue_viewer = GitHubIssueViewer( github_token=github_token, @@ -43,6 +76,9 @@ def __init__( name=name, description=description, parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, ) def to_dict(self) -> Dict[str, Any]: @@ -52,15 +88,23 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - return default_to_dict( - self, - name=self.name, - description=self.description, - parameters=self.parameters, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure, - retry_attempts=self.retry_attempts, - ) + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "retry_attempts": self.retry_attempts, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return { + "type": "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool", + "data": serialized, + } @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewerTool": @@ -72,5 +116,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewerTool": :returns: Deserialized tool. """ - deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) - return default_from_dict(cls, data) + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/tests/test_issue_viewer_tool.py b/integrations/github/tests/test_issue_viewer_tool.py index ba343457c8..8edb7b8a15 100644 --- a/integrations/github/tests/test_issue_viewer_tool.py +++ b/integrations/github/tests/test_issue_viewer_tool.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from haystack_integrations.prompts.github.issue_viewer_prompt import ISSUE_VIEWER_PROMPT, ISSUE_VIEWER_SCHEMA from haystack_integrations.tools.github.issue_viewer_tool import GitHubIssueViewerTool +from haystack_integrations.tools.github.utils import message_handler class TestGitHubIssueViewerTool: @@ -12,37 +13,116 @@ def test_init(self, monkeypatch): assert tool.name == "issue_viewer" assert tool.description == ISSUE_VIEWER_PROMPT assert tool.parameters == ISSUE_VIEWER_SCHEMA + assert tool.github_token is None + assert tool.raise_on_failure is True assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None def test_from_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool_dict = { "type": "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool", - "init_parameters": { - "name": "issue_viewer", - "description": ISSUE_VIEWER_PROMPT, - "parameters": ISSUE_VIEWER_SCHEMA, + "data": { + "name": "test_issue_viewer", + "description": "Test description", + "parameters": {"type": "object", "properties": {}}, "github_token": None, "raise_on_failure": True, "retry_attempts": 2, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, }, } tool = GitHubIssueViewerTool.from_dict(tool_dict) - assert tool.name == "issue_viewer" - assert tool.description == ISSUE_VIEWER_PROMPT - assert tool.parameters == ISSUE_VIEWER_SCHEMA + assert tool.name == "test_issue_viewer" + assert tool.description == "Test description" + assert tool.parameters == {"type": "object", "properties": {}} assert tool.github_token is None assert tool.raise_on_failure assert tool.retry_attempts == 2 + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None def test_to_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool = GitHubIssueViewerTool() tool_dict = tool.to_dict() assert tool_dict["type"] == "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool" - assert tool_dict["init_parameters"]["name"] == "issue_viewer" - assert tool_dict["init_parameters"]["description"] == ISSUE_VIEWER_PROMPT - assert tool_dict["init_parameters"]["parameters"] == ISSUE_VIEWER_SCHEMA - assert tool_dict["init_parameters"]["github_token"] is None - assert tool_dict["init_parameters"]["raise_on_failure"] - assert tool_dict["init_parameters"]["retry_attempts"] == 2 + assert tool_dict["data"]["name"] == "issue_viewer" + assert tool_dict["data"]["description"] == ISSUE_VIEWER_PROMPT + assert tool_dict["data"]["parameters"] == ISSUE_VIEWER_SCHEMA + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["raise_on_failure"] is True + assert tool_dict["data"]["retry_attempts"] == 2 + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubIssueViewerTool( + name="test_issue_viewer", + description="Test description", + parameters={"type": "object", "properties": {}}, + github_token=None, + raise_on_failure=False, + retry_attempts=3, + outputs_to_string={"handler": message_handler}, + inputs_from_state={"repository": "repo"}, + outputs_to_state={"documents": {"source": "docs", "handler": message_handler}}, + ) + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool" + assert tool_dict["data"]["name"] == "test_issue_viewer" + assert tool_dict["data"]["description"] == "Test description" + assert tool_dict["data"]["parameters"] == {"type": "object", "properties": {}} + assert tool_dict["data"]["github_token"] is None + assert tool_dict["data"]["raise_on_failure"] is False + assert tool_dict["data"]["retry_attempts"] == 3 + assert ( + tool_dict["data"]["outputs_to_string"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"} + assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs" + assert ( + tool_dict["data"]["outputs_to_state"]["documents"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool", + "data": { + "name": "test_issue_viewer", + "description": "Test description", + "parameters": {"type": "object", "properties": {}}, + "github_token": None, + "raise_on_failure": False, + "retry_attempts": 3, + "outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"}, + "inputs_from_state": {"repository": "repo"}, + "outputs_to_state": { + "documents": { + "source": "docs", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + }, + }, + } + tool = GitHubIssueViewerTool.from_dict(tool_dict) + assert tool.name == "test_issue_viewer" + assert tool.description == "Test description" + assert tool.parameters == {"type": "object", "properties": {}} + assert tool.github_token is None + assert tool.raise_on_failure is False + assert tool.retry_attempts == 3 + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repository": "repo"} + assert tool.outputs_to_state["documents"]["source"] == "docs" + assert tool.outputs_to_state["documents"]["handler"] == message_handler From f088c18243101ab20bb19c74e240e47bf5108e15 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Tue, 27 May 2025 13:24:14 +0200 Subject: [PATCH 47/51] add outputs_to_state to GitHubPRCreatorTool; replace init_parameters with data --- .../tools/github/pr_creator_tool.py | 65 ++++++++++--- .../github/tests/test_pr_creator_tool.py | 94 +++++++++++++++++-- 2 files changed, 140 insertions(+), 19 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py index de5a0685ae..5700ab5751 100644 --- a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -1,19 +1,45 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, Optional +from typing import Any, Callable, Dict, Optional, Union -from haystack import default_from_dict, default_to_dict from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace from haystack_integrations.components.connectors.github.pr_creator import GitHubPRCreator from haystack_integrations.prompts.github.pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA +from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers class GitHubPRCreatorTool(ComponentTool): """ A tool for creating pull requests in GitHub repositories. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } """ def __init__( @@ -24,12 +50,18 @@ def __init__( parameters: Optional[Dict[str, Any]] = PR_CREATOR_SCHEMA, github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"), raise_on_failure: bool = True, + outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None, + inputs_from_state: Optional[Dict[str, str]] = None, + outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): self.name = name self.description = description self.parameters = parameters self.github_token = github_token self.raise_on_failure = raise_on_failure + self.outputs_to_string = outputs_to_string + self.inputs_from_state = inputs_from_state + self.outputs_to_state = outputs_to_state pr_creator = GitHubPRCreator( github_token=github_token, @@ -40,6 +72,9 @@ def __init__( name=name, description=description, parameters=parameters, + outputs_to_string=outputs_to_string, + inputs_from_state=inputs_from_state, + outputs_to_state=outputs_to_state, ) def to_dict(self) -> Dict[str, Any]: @@ -49,14 +84,18 @@ def to_dict(self) -> Dict[str, Any]: :returns: Dictionary with serialized data. """ - return default_to_dict( - self, - name=self.name, - description=self.description, - parameters=self.parameters, - github_token=self.github_token.to_dict() if self.github_token else None, - raise_on_failure=self.raise_on_failure, - ) + serialized = { + "name": self.name, + "description": self.description, + "parameters": self.parameters, + "github_token": self.github_token.to_dict() if self.github_token else None, + "raise_on_failure": self.raise_on_failure, + "outputs_to_string": self.outputs_to_string, + "inputs_from_state": self.inputs_from_state, + "outputs_to_state": self.outputs_to_state, + } + serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) + return {"type": "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool", "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreatorTool": @@ -68,5 +107,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreatorTool": :returns: Deserialized tool. """ - deserialize_secrets_inplace(data["init_parameters"], keys=["github_token"]) - return default_from_dict(cls, data) + inner_data = data["data"] + deserialize_secrets_inplace(inner_data, keys=["github_token"]) + deserialize_handlers(inner_data) + return cls(**inner_data) diff --git a/integrations/github/tests/test_pr_creator_tool.py b/integrations/github/tests/test_pr_creator_tool.py index 35db78004e..fd3e2d4499 100644 --- a/integrations/github/tests/test_pr_creator_tool.py +++ b/integrations/github/tests/test_pr_creator_tool.py @@ -5,6 +5,7 @@ from haystack_integrations.prompts.github.pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA from haystack_integrations.tools.github.pr_creator_tool import GitHubPRCreatorTool +from haystack_integrations.tools.github.utils import message_handler class TestGitHubPRCreatorTool: @@ -14,17 +15,25 @@ def test_init(self, monkeypatch): assert tool.name == "pr_creator" assert tool.description == PR_CREATOR_PROMPT assert tool.parameters == PR_CREATOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure is True + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None def test_from_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool_dict = { "type": "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool", - "init_parameters": { + "data": { "name": "pr_creator", "description": PR_CREATOR_PROMPT, "parameters": PR_CREATOR_SCHEMA, "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, "raise_on_failure": True, + "outputs_to_string": None, + "inputs_from_state": None, + "outputs_to_state": None, }, } tool = GitHubPRCreatorTool.from_dict(tool_dict) @@ -32,19 +41,90 @@ def test_from_dict(self, monkeypatch): assert tool.description == PR_CREATOR_PROMPT assert tool.parameters == PR_CREATOR_SCHEMA assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") - assert tool.raise_on_failure + assert tool.raise_on_failure is True + assert tool.outputs_to_string is None + assert tool.inputs_from_state is None + assert tool.outputs_to_state is None def test_to_dict(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "test-token") tool = GitHubPRCreatorTool() tool_dict = tool.to_dict() assert tool_dict["type"] == "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool" - assert tool_dict["init_parameters"]["name"] == "pr_creator" - assert tool_dict["init_parameters"]["description"] == PR_CREATOR_PROMPT - assert tool_dict["init_parameters"]["parameters"] == PR_CREATOR_SCHEMA - assert tool_dict["init_parameters"]["github_token"] == { + assert tool_dict["data"]["name"] == "pr_creator" + assert tool_dict["data"]["description"] == PR_CREATOR_PROMPT + assert tool_dict["data"]["parameters"] == PR_CREATOR_SCHEMA + assert tool_dict["data"]["github_token"] == { "env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var", } - assert tool_dict["init_parameters"]["raise_on_failure"] + assert tool_dict["data"]["raise_on_failure"] is True + assert tool_dict["data"]["outputs_to_string"] is None + assert tool_dict["data"]["inputs_from_state"] is None + assert tool_dict["data"]["outputs_to_state"] is None + + def test_to_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool = GitHubPRCreatorTool( + name="pr_creator", + description="PR Creator Tool", + parameters=PR_CREATOR_SCHEMA, + github_token=Secret.from_env_var("GITHUB_TOKEN"), + raise_on_failure=False, + outputs_to_string={"handler": message_handler}, + inputs_from_state={"repository": "repo"}, + outputs_to_state={"documents": {"source": "docs", "handler": message_handler}}, + ) + tool_dict = tool.to_dict() + assert tool_dict["type"] == "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool" + assert tool_dict["data"]["name"] == "pr_creator" + assert tool_dict["data"]["description"] == "PR Creator Tool" + assert tool_dict["data"]["parameters"] == PR_CREATOR_SCHEMA + assert tool_dict["data"]["github_token"] == { + "env_vars": ["GITHUB_TOKEN"], + "strict": True, + "type": "env_var", + } + assert tool_dict["data"]["raise_on_failure"] is False + assert ( + tool_dict["data"]["outputs_to_string"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"} + assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs" + assert ( + tool_dict["data"]["outputs_to_state"]["documents"]["handler"] + == "haystack_integrations.tools.github.utils.message_handler" + ) + + def test_from_dict_with_extra_params(self, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "test-token") + tool_dict = { + "type": "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool", + "data": { + "name": "pr_creator", + "description": "PR Creator Tool", + "parameters": PR_CREATOR_SCHEMA, + "github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"}, + "raise_on_failure": False, + "outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"}, + "inputs_from_state": {"repository": "repo"}, + "outputs_to_state": { + "documents": { + "source": "docs", + "handler": "haystack_integrations.tools.github.utils.message_handler", + } + }, + }, + } + tool = GitHubPRCreatorTool.from_dict(tool_dict) + assert tool.name == "pr_creator" + assert tool.description == "PR Creator Tool" + assert tool.parameters == PR_CREATOR_SCHEMA + assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN") + assert tool.raise_on_failure is False + assert tool.outputs_to_string["handler"] == message_handler + assert tool.inputs_from_state == {"repository": "repo"} + assert tool.outputs_to_state["documents"]["source"] == "docs" + assert tool.outputs_to_state["documents"]["handler"] == message_handler From a822bc1dc8ffbfd08e24c063a2bf80f95eba4924 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 28 May 2025 11:07:00 +0200 Subject: [PATCH 48/51] move param docstrings to init methods --- .../tools/github/file_editor_tool.py | 59 ++++++++-------- .../tools/github/issue_commenter_tool.py | 57 ++++++++-------- .../tools/github/issue_viewer_tool.py | 57 ++++++++-------- .../tools/github/pr_creator_tool.py | 55 ++++++++------- .../tools/github/repo_viewer_tool.py | 67 ++++++++++--------- 5 files changed, 155 insertions(+), 140 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index 2c7ed45110..25d2693cfd 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -14,34 +14,6 @@ class GitHubFileEditorTool(ComponentTool): """ A tool for editing files in GitHub repositories. - - :param name: Optional name for the tool. - :param description: Optional description. - :param parameters: Optional JSON schema defining the parameters expected by the Tool. - :param github_token: GitHub personal access token for API authentication - :param repo: Default repository in owner/repo format - :param branch: Default branch to work with - :param raise_on_failure: If True, raises exceptions on API errors - :param outputs_to_string: - Optional dictionary defining how a tool outputs should be converted into a string. - If the source is provided only the specified output key is sent to the handler. - If the source is omitted the whole tool result is sent to the handler. - Example: { - "source": "docs", "handler": format_documents - } - :param inputs_from_state: - Optional dictionary mapping state keys to tool parameter names. - Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. - :param outputs_to_state: - Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. - If the source is provided only the specified output key is sent to the handler. - Example: { - "documents": {"source": "docs", "handler": custom_handler} - } - If the source is omitted the whole tool result is sent to the handler. - Example: { - "documents": {"handler": custom_handler} - } """ def __init__( @@ -58,6 +30,37 @@ def __init__( inputs_from_state: Optional[Dict[str, str]] = None, outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): + """ + Initialize the GitHub file editor tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ self.name = name self.description = description self.parameters = parameters diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py index 65248170b7..728a8af86c 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py @@ -14,33 +14,6 @@ class GitHubIssueCommenterTool(ComponentTool): """ A tool for commenting on GitHub issues. - - :param name: Optional name for the tool. - :param description: Optional description. - :param parameters: Optional JSON schema defining the parameters expected by the Tool. - :param github_token: GitHub personal access token for API authentication - :param raise_on_failure: If True, raises exceptions on API errors - :param retry_attempts: Number of retry attempts for failed requests - :param outputs_to_string: - Optional dictionary defining how a tool outputs should be converted into a string. - If the source is provided only the specified output key is sent to the handler. - If the source is omitted the whole tool result is sent to the handler. - Example: { - "source": "docs", "handler": format_documents - } - :param inputs_from_state: - Optional dictionary mapping state keys to tool parameter names. - Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. - :param outputs_to_state: - Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. - If the source is provided only the specified output key is sent to the handler. - Example: { - "documents": {"source": "docs", "handler": custom_handler} - } - If the source is omitted the whole tool result is sent to the handler. - Example: { - "documents": {"handler": custom_handler} - } """ def __init__( @@ -56,6 +29,36 @@ def __init__( inputs_from_state: Optional[Dict[str, str]] = None, outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): + """ + Initialize the GitHub issue commenter tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ self.name = name self.description = description self.parameters = parameters diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py index 5cbd99956b..547aebe08f 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py @@ -14,33 +14,6 @@ class GitHubIssueViewerTool(ComponentTool): """ A tool for viewing GitHub issues. - - :param name: Optional name for the tool. - :param description: Optional description. - :param parameters: Optional JSON schema defining the parameters expected by the Tool. - :param github_token: Optional GitHub personal access token for API authentication - :param raise_on_failure: If True, raises exceptions on API errors - :param retry_attempts: Number of retry attempts for failed requests - :param outputs_to_string: - Optional dictionary defining how a tool outputs should be converted into a string. - If the source is provided only the specified output key is sent to the handler. - If the source is omitted the whole tool result is sent to the handler. - Example: { - "source": "docs", "handler": format_documents - } - :param inputs_from_state: - Optional dictionary mapping state keys to tool parameter names. - Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. - :param outputs_to_state: - Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. - If the source is provided only the specified output key is sent to the handler. - Example: { - "documents": {"source": "docs", "handler": custom_handler} - } - If the source is omitted the whole tool result is sent to the handler. - Example: { - "documents": {"handler": custom_handler} - } """ def __init__( @@ -56,6 +29,36 @@ def __init__( inputs_from_state: Optional[Dict[str, str]] = None, outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): + """ + Initialize the GitHub issue viewer tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: Optional GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param retry_attempts: Number of retry attempts for failed requests + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ self.name = name self.description = description self.parameters = parameters diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py index 5700ab5751..08966d005a 100644 --- a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -14,32 +14,6 @@ class GitHubPRCreatorTool(ComponentTool): """ A tool for creating pull requests in GitHub repositories. - - :param name: Optional name for the tool. - :param description: Optional description. - :param parameters: Optional JSON schema defining the parameters expected by the Tool. - :param github_token: GitHub personal access token for API authentication - :param raise_on_failure: If True, raises exceptions on API errors - :param outputs_to_string: - Optional dictionary defining how a tool outputs should be converted into a string. - If the source is provided only the specified output key is sent to the handler. - If the source is omitted the whole tool result is sent to the handler. - Example: { - "source": "docs", "handler": format_documents - } - :param inputs_from_state: - Optional dictionary mapping state keys to tool parameter names. - Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. - :param outputs_to_state: - Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. - If the source is provided only the specified output key is sent to the handler. - Example: { - "documents": {"source": "docs", "handler": custom_handler} - } - If the source is omitted the whole tool result is sent to the handler. - Example: { - "documents": {"handler": custom_handler} - } """ def __init__( @@ -54,6 +28,35 @@ def __init__( inputs_from_state: Optional[Dict[str, str]] = None, outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): + """ + Initialize the GitHub PR creator tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: GitHub personal access token for API authentication + :param raise_on_failure: If True, raises exceptions on API errors + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ self.name = name self.description = description self.parameters = parameters diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index d53193bca6..88bf89623a 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -14,38 +14,6 @@ class GitHubRepoViewerTool(ComponentTool): """ A tool for viewing files and directories in GitHub repositories. - - :param name: Optional name for the tool. - :param description: Optional description. - :param parameters: Optional JSON schema defining the parameters expected by the Tool. - :param github_token: GitHub personal access token for API authentication - :param repo: Default repository in owner/repo format - :param branch: Default branch to work with - :param raise_on_failure: If True, raises exceptions on API errors - :param max_file_size: Maximum file size in bytes to read - :param outputs_to_string: - Optional dictionary defining how a tool outputs should be converted into a string. - By default, truncates the document.content of the viewed files to 150,000 characters each. - If the source is provided only the specified output key is sent to the handler. - If the source is omitted the whole tool result is sent to the handler. - Example: { - "source": "docs", "handler": format_documents - } - :param inputs_from_state: - Optional dictionary mapping state keys to tool parameter names. - By default, the tool does not use any inputs from state. - Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. - :param outputs_to_state: - Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. - By default, outputs the viewed files as documents to the state. - If the source is provided only the specified output key is sent to the handler. - Example: { - "documents": {"source": "docs", "handler": custom_handler} - } - If the source is omitted the whole tool result is sent to the handler. - Example: { - "documents": {"handler": custom_handler} - } """ def __init__( @@ -63,6 +31,41 @@ def __init__( inputs_from_state: Optional[Dict[str, str]] = None, outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None, ): + """ + Initialize the GitHub repository viewer tool. + + :param name: Optional name for the tool. + :param description: Optional description. + :param parameters: Optional JSON schema defining the parameters expected by the Tool. + :param github_token: Optional GitHub personal access token for API authentication + :param repo: Default repository in owner/repo format + :param branch: Default branch to work with + :param raise_on_failure: If True, raises exceptions on API errors + :param max_file_size: Maximum file size in bytes to read + :param outputs_to_string: + Optional dictionary defining how a tool outputs should be converted into a string. + By default, truncates the document.content of the viewed files to 150,000 characters each. + If the source is provided only the specified output key is sent to the handler. + If the source is omitted the whole tool result is sent to the handler. + Example: { + "source": "docs", "handler": format_documents + } + :param inputs_from_state: + Optional dictionary mapping state keys to tool parameter names. + By default, the tool does not use any inputs from state. + Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter. + :param outputs_to_state: + Optional dictionary defining how tool outputs map to keys within state as well as optional handlers. + By default, outputs the viewed files as documents to the state. + If the source is provided only the specified output key is sent to the handler. + Example: { + "documents": {"source": "docs", "handler": custom_handler} + } + If the source is omitted the whole tool result is sent to the handler. + Example: { + "documents": {"handler": custom_handler} + } + """ self.name = name self.description = description self.parameters = parameters From f911175dcf743215b9803853e0827d8c5f289f43 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 28 May 2025 11:11:06 +0200 Subject: [PATCH 49/51] use generate_qualified_class_name instead of hardcoded name --- .../haystack_integrations/tools/github/file_editor_tool.py | 3 ++- .../tools/github/issue_commenter_tool.py | 6 ++---- .../haystack_integrations/tools/github/issue_viewer_tool.py | 6 ++---- .../haystack_integrations/tools/github/pr_creator_tool.py | 3 ++- .../haystack_integrations/tools/github/repo_viewer_tool.py | 3 ++- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py index 25d2693cfd..ac1a215231 100644 --- a/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/file_editor_tool.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Any, Callable, Dict, Optional, Union +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace @@ -109,7 +110,7 @@ def to_dict(self) -> Dict[str, Any]: } serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) - return {"type": "haystack_integrations.tools.github.file_editor_tool.GitHubFileEditorTool", "data": serialized} + return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubFileEditorTool": diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py index 728a8af86c..43d37740f2 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_commenter_tool.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Any, Callable, Dict, Optional, Union +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace @@ -104,10 +105,7 @@ def to_dict(self) -> Dict[str, Any]: } serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) - return { - "type": "haystack_integrations.tools.github.issue_commenter_tool.GitHubIssueCommenterTool", - "data": serialized, - } + return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueCommenterTool": diff --git a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py index 547aebe08f..48dc28f0c3 100644 --- a/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/issue_viewer_tool.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Any, Callable, Dict, Optional, Union +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace @@ -104,10 +105,7 @@ def to_dict(self) -> Dict[str, Any]: } serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) - return { - "type": "haystack_integrations.tools.github.issue_viewer_tool.GitHubIssueViewerTool", - "data": serialized, - } + return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubIssueViewerTool": diff --git a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py index 08966d005a..9d480658ab 100644 --- a/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/pr_creator_tool.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Any, Callable, Dict, Optional, Union +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace @@ -98,7 +99,7 @@ def to_dict(self) -> Dict[str, Any]: "outputs_to_state": self.outputs_to_state, } serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) - return {"type": "haystack_integrations.tools.github.pr_creator_tool.GitHubPRCreatorTool", "data": serialized} + return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubPRCreatorTool": diff --git a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py index 88bf89623a..43a0b0c77a 100644 --- a/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py +++ b/integrations/github/src/haystack_integrations/tools/github/repo_viewer_tool.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from typing import Any, Callable, Dict, Optional, Union +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import ComponentTool from haystack.utils import Secret, deserialize_secrets_inplace @@ -119,7 +120,7 @@ def to_dict(self) -> Dict[str, Any]: } serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string) - return {"type": "haystack_integrations.tools.github.repo_viewer_tool.GitHubRepoViewerTool", "data": serialized} + return {"type": generate_qualified_class_name(type(self)), "data": serialized} @classmethod def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoViewerTool": From 1ac5ba29931192e4e0a346214070fd1cc8712ad4 Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 28 May 2025 11:34:25 +0200 Subject: [PATCH 50/51] test with lowest supported version --- .github/workflows/github.yml | 7 +++++++ integrations/github/pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/github.yml b/.github/workflows/github.yml index 192857e978..21c9ba44a1 100644 --- a/.github/workflows/github.yml +++ b/.github/workflows/github.yml @@ -60,9 +60,16 @@ jobs: - name: Run tests run: hatch run cov-retry + - name: Run unit tests with lowest direct dependencies + run: | + hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt + hatch run uv pip install -r requirements_lowest_direct.txt + hatch run test -m "not integration" + - name: Nightly - run unit tests with Haystack main branch if: github.event_name == 'schedule' run: | + hatch env prune hatch run uv pip install git+https://github.com/deepset-ai/haystack.git@main hatch run cov-retry -m "not integration" diff --git a/integrations/github/pyproject.toml b/integrations/github/pyproject.toml index d3a8d0fce5..48ea0034db 100644 --- a/integrations/github/pyproject.toml +++ b/integrations/github/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai"] +dependencies = ["haystack-ai>=2.12.0"] [project.urls] Source = "https://github.com/deepset-ai/haystack-core-integrations/github" From fc027f58c67a7deaa792be963b2a7992a021a4af Mon Sep 17 00:00:00 2001 From: Julian Risch Date: Wed, 28 May 2025 11:49:19 +0200 Subject: [PATCH 51/51] don't test http_client_kwargs for compatibility with Haystack 2.12 --- integrations/github/tests/test_file_editor_tool.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/integrations/github/tests/test_file_editor_tool.py b/integrations/github/tests/test_file_editor_tool.py index c1b8d6aefe..f590061786 100644 --- a/integrations/github/tests/test_file_editor_tool.py +++ b/integrations/github/tests/test_file_editor_tool.py @@ -141,7 +141,15 @@ def test_pipeline_serialization(self, monkeypatch): pipeline_dict = pipeline.to_dict() - assert pipeline_dict == { + # Remove http_client_kwargs from both dictionaries if it exists + # We don't want to test the http_client_kwargs because Haystack 2.12.0 doesn't have it + # Only Haystack 2.13.0+ has it + if "components" in pipeline_dict: + agent_params = pipeline_dict["components"]["agent"]["init_parameters"]["chat_generator"]["init_parameters"] + if "http_client_kwargs" in agent_params: + del agent_params["http_client_kwargs"] + + expected_dict = { "metadata": {}, "max_runs_per_component": 100, "components": { @@ -161,7 +169,6 @@ def test_pipeline_serialization(self, monkeypatch): "max_retries": None, "tools": None, "tools_strict": False, - "http_client_kwargs": None, }, }, "tools": [ @@ -194,6 +201,8 @@ def test_pipeline_serialization(self, monkeypatch): "connection_type_validation": True, } + assert pipeline_dict == expected_dict + deserialized_pipeline = Pipeline.from_dict(pipeline_dict) deserialized_components = [instance for _, instance in deserialized_pipeline.graph.nodes(data="instance")] deserialized_agent = deserialized_components[0]