Skip to content

Commit 60694c8

Browse files
authored
Support GCS paths for server and tool instructions (#183)
1 parent 00f731b commit 60694c8

7 files changed

Lines changed: 134 additions & 25 deletions

File tree

packages/datacommons-mcp/.env.sample

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ DC_TYPE=base
5050
# Path to directory containing markdown file overrides for server instructions and/or tool descriptions.
5151
# Supports partial overrides: only create files for the specific instructions or tools you want to replace.
5252
# The system will fall back to package defaults for any file not found here.
53+
# Supports both local filesystem paths and Google Cloud Storage paths (e.g., gs://bucket/path).
5354
#
5455
# Expected structure inside this directory:
5556
# - server.md

packages/datacommons-mcp/datacommons_mcp/app.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
MCP_SERVER_NAME = "DC MCP Server"
3636
DEFAULT_INSTRUCTIONS_PACKAGE = "datacommons_mcp.instructions"
37-
SERVER_INSTRUCTION_FILE = "server.md"
37+
SERVER_INSTRUCTIONS_FILE = "server.md"
3838

3939

4040
class DCApp:
@@ -62,15 +62,15 @@ def __init__(self) -> None:
6262
raise
6363

6464
# Load Server Instructions
65-
server_instructions = self._load_instruction(SERVER_INSTRUCTION_FILE)
65+
server_instructions = self._load_instructions(SERVER_INSTRUCTIONS_FILE)
6666

6767
self.mcp = FastMCP(
6868
MCP_SERVER_NAME,
6969
version=__version__,
7070
instructions=server_instructions,
7171
)
7272

73-
def _load_instruction(self, filename: str) -> str:
73+
def _load_instructions(self, filename: str) -> str:
7474
"""
7575
Loads markdown content.
7676
Priority:
@@ -82,13 +82,13 @@ def _load_instruction(self, filename: str) -> str:
8282
content = read_external_content(self.settings.instructions_dir, filename)
8383
if content is not None:
8484
logger.info(
85-
"Loaded custom instruction for %s from %s",
85+
"Loaded custom instructions for %s from %s",
8686
filename,
8787
self.settings.instructions_dir,
8888
)
8989
return content
9090
logger.debug(
91-
"Custom instruction file %s not found in %s, falling back to default.",
91+
"Custom instructions file %s not found in %s, falling back to default.",
9292
filename,
9393
self.settings.instructions_dir,
9494
)
@@ -101,9 +101,9 @@ def register_tool(self, func: Callable[..., Any], instruction_file: str) -> None
101101
102102
Args:
103103
func: The tool function to register.
104-
instruction_file: Path to instruction file relative to instructions dir.
104+
instruction_file: Path to instructions file relative to instructions dir.
105105
"""
106-
description = self._load_instruction(instruction_file)
106+
description = self._load_instructions(instruction_file)
107107
if not description:
108108
logger.warning(
109109
"No description found for tool %s from file %s",

packages/datacommons-mcp/datacommons_mcp/utils.py

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,14 @@
1414

1515
import importlib.resources
1616
import logging
17+
from functools import cache
1718
from pathlib import Path
19+
from typing import TYPE_CHECKING
1820

1921
import requests
22+
23+
if TYPE_CHECKING:
24+
from google.cloud import storage
2025
from datacommons_client.models.observation import Observation
2126

2227
from datacommons_mcp.data_models.observations import DateRange, ObservationDate
@@ -93,33 +98,68 @@ def filter_by_date(
9398
return filtered_list
9499

95100

96-
def read_external_content(base_path: str, filename: str) -> str | None:
97-
"""Reads content from an external location (currently only local paths).
101+
@cache
102+
def _get_gcs_client() -> "storage.Client":
103+
"""Returns a cached GCS client instance."""
104+
# Local import to avoid loading the module unless GCS is required
105+
from google.cloud import storage
98106

99-
Args:
100-
base_path: The base directory to look in.
101-
filename: The name of the file to read (relative to base_path). Can include
102-
subdirectories (e.g. "tools/search_indicators.md").
107+
return storage.Client()
103108

104-
Returns:
105-
The content of the file as a string, or None if the file does not exist
106-
or cannot be read.
107109

108-
Example:
109-
>>> content = read_external_content("/path/to/instructions", "server.md")
110-
"""
111-
# TODO(keyurs): Add support for GCS if needed. This is useful for Custom DCs deployed in the cloud.
110+
def _read_local_content(path: Path) -> str | None:
111+
"""Reads content from a local file path."""
112112
try:
113-
path = Path(base_path) / filename
114113
if path.exists() and path.is_file():
115114
return path.read_text(encoding="utf-8")
116115
except Exception as e:
116+
logger.warning("Failed to read local file %s: %s", path, e)
117+
return None
118+
119+
120+
def _read_gcs_content(uri: str) -> str | None:
121+
"""Reads content from a GCS blob URI."""
122+
from google.cloud import storage
123+
from google.cloud.exceptions import NotFound
124+
125+
try:
126+
client = _get_gcs_client()
127+
# Create the blob object directly from the URI
128+
blob = storage.Blob.from_string(uri, client=client)
129+
return blob.download_as_text(encoding="utf-8")
130+
except NotFound:
117131
logger.warning(
118-
"Failed to read external instruction %s from %s: %s", filename, base_path, e
132+
"GCS blob %s not found. Falling back to default.",
133+
uri,
134+
)
135+
return None
136+
except Exception as e:
137+
logger.warning(
138+
"Failed to read GCS blob %s: %s",
139+
uri,
140+
e,
119141
)
120142
return None
121143

122144

145+
def read_external_content(base_path: str, filename: str) -> str | None:
146+
"""Reads content from an external location (local or GCS).
147+
148+
Args:
149+
base_path: The base directory or GCS path (gs://bucket/prefix) to look in.
150+
filename: The name of the file to read (relative to base_path).
151+
152+
Returns:
153+
The content of the file as a string, or None if the file does not exist.
154+
"""
155+
if base_path.startswith("gs://"):
156+
uri = f"{base_path.rstrip('/')}/{filename}"
157+
return _read_gcs_content(uri)
158+
159+
path = Path(base_path) / filename
160+
return _read_local_content(path)
161+
162+
123163
def read_package_content(package: str, filename: str) -> str:
124164
"""Reads content from the package resources.
125165

packages/datacommons-mcp/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ dependencies = [
1414
"pydantic-settings",
1515
"python-dateutil>=2.9.0.post0",
1616
"python-dotenv>=1.1.1",
17+
"google-cloud-storage",
1718
]
1819
urls = {Homepage = "https://github.com/datacommonsorg/agent-toolkit"}
1920
license = {file = "LICENSE"}

packages/datacommons-mcp/tests/test_app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def test_load_instruction_tool_override(mock_settings, tmp_path, create_test_fil
9696
from datacommons_mcp.app import DCApp
9797

9898
app = DCApp()
99-
content = app._load_instruction("tools/test_tool.md")
99+
content = app._load_instructions("tools/test_tool.md")
100100
assert content == "Custom Tool Instructions"
101101

102102

@@ -114,7 +114,7 @@ def test_load_instruction_fallback(mock_settings, tmp_path):
114114
app = DCApp()
115115

116116
# Should fall back to default package resource (server.md exists in package)
117-
content = app._load_instruction("server.md")
117+
content = app._load_instructions("server.md")
118118
assert "Data Commons" in content
119119

120120

packages/datacommons-mcp/tests/test_utils.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@
1313
# limitations under the License.
1414

1515

16+
from unittest.mock import MagicMock, patch
17+
1618
import pytest
1719
import requests
1820
from datacommons_client.models.observation import Observation
1921
from datacommons_mcp.data_models.observations import DateRange
2022
from datacommons_mcp.exceptions import APIKeyValidationError, InvalidAPIKeyError
2123
from datacommons_mcp.utils import (
2224
VALIDATION_API_PATH,
25+
_get_gcs_client,
2326
filter_by_date,
2427
read_external_content,
2528
read_package_content,
@@ -85,6 +88,22 @@ def test_validate_api_key_network_error(self, requests_mock):
8588

8689

8790
class TestReadContent:
91+
@pytest.fixture
92+
def mock_gcs(self):
93+
with (
94+
patch("google.cloud.storage.Client") as mock_client_class,
95+
patch("google.cloud.storage.Blob.from_string") as mock_from_string,
96+
):
97+
_get_gcs_client.cache_clear()
98+
mock_client = MagicMock()
99+
mock_client_class.return_value = mock_client
100+
101+
mock_blob = MagicMock()
102+
mock_from_string.return_value = mock_blob
103+
mock_blob.download_as_text.return_value = "gcs content"
104+
105+
yield mock_client, mock_from_string, mock_blob
106+
88107
def test_read_external_content_success(self, tmp_path, create_test_file):
89108
create_test_file("test.md", "content")
90109
assert read_external_content(str(tmp_path), "test.md") == "content"
@@ -97,6 +116,52 @@ def test_read_external_content_subdir(self, tmp_path, create_test_file):
97116
def test_read_external_content_missing(self, tmp_path):
98117
assert read_external_content(str(tmp_path), "missing.md") is None
99118

119+
def test_read_external_content_gcs_success(self, mock_gcs):
120+
mock_client, mock_from_string, mock_blob = mock_gcs
121+
mock_blob.download_as_text.return_value = "custom content 1"
122+
123+
content = read_external_content("gs://my-bucket/path", "test.md")
124+
125+
assert content == "custom content 1"
126+
mock_from_string.assert_called_once_with(
127+
"gs://my-bucket/path/test.md", client=mock_client
128+
)
129+
130+
def test_read_external_content_gcs_success_no_prefix(self, mock_gcs):
131+
mock_client, mock_from_string, mock_blob = mock_gcs
132+
mock_blob.download_as_text.return_value = "custom content 2"
133+
134+
content = read_external_content("gs://my-bucket", "test.md")
135+
136+
assert content == "custom content 2"
137+
mock_from_string.assert_called_once_with(
138+
"gs://my-bucket/test.md", client=mock_client
139+
)
140+
141+
def test_read_external_content_gcs_not_found(self, mock_gcs):
142+
from google.cloud.exceptions import NotFound
143+
144+
mock_client, mock_from_string, mock_blob = mock_gcs
145+
mock_blob.download_as_text.side_effect = NotFound("Blob not found")
146+
147+
content = read_external_content("gs://my-bucket", "test.md")
148+
149+
assert content is None
150+
mock_from_string.assert_called_once_with(
151+
"gs://my-bucket/test.md", client=mock_client
152+
)
153+
154+
def test_read_external_content_gcs_failure(self, mock_gcs):
155+
mock_client, mock_from_string, mock_blob = mock_gcs
156+
mock_blob.download_as_text.side_effect = Exception("GCS error")
157+
158+
content = read_external_content("gs://my-bucket", "test.md")
159+
160+
assert content is None
161+
mock_from_string.assert_called_once_with(
162+
"gs://my-bucket/test.md", client=mock_client
163+
)
164+
100165
def test_read_package_content_success(self):
101166
# Read actual content from the package
102167
content = read_package_content("datacommons_mcp.instructions", "server.md")

uv.lock

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)