Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
venv
.idea
dist/
weaviate_client.egg-info
*.egg-info/
**/__pycache__
tmp
build/
Expand Down
85 changes: 85 additions & 0 deletions mock_tests/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,44 @@ def test_client_credentials(weaviate_auth_mock: HTTPServer, start_grpc_server: g
weaviate_auth_mock.check_assertions()


@pytest.mark.asyncio
async def test_client_credentials_refresh_async(
weaviate_auth_mock: HTTPServer, start_grpc_server: grpc.Server
) -> None:
"""Test the refresh_session branch of the async token refresher.

Client-credentials tokens carry no refresh token, so the refresher must get a whole
new token from the saved credentials.
"""
token_requests = 0

def handler(request: Request) -> Response:
nonlocal token_requests
token_requests += 1
return Response(
json.dumps({"access_token": ACCESS_TOKEN, "expires_in": 1}),
content_type="application/json",
)

weaviate_auth_mock.expect_request("/auth").respond_with_handler(handler)
weaviate_auth_mock.expect_request(
"/v1/schema", headers={"Authorization": "Bearer " + ACCESS_TOKEN}
).respond_with_json({"classes": []})

async with weaviate.use_async_with_local(
host=MOCK_IP,
port=MOCK_PORT,
grpc_port=MOCK_PORT_GRPC,
auth_credentials=weaviate.auth.AuthClientCredentials(
client_secret=CLIENT_SECRET, scope=SCOPE
),
) as client:
await client.collections.list_all()
first = token_requests
await asyncio.sleep(3) # refresh interval is max(expires_in - 30, 1) -> 1s
assert token_requests > first # a fresh token was fetched with the credentials


@pytest.mark.parametrize("header_name", ["Authorization", "authorization"])
def test_auth_header_priority(
recwarn, weaviate_auth_mock: HTTPServer, start_grpc_server: grpc.Server, header_name: str
Expand Down Expand Up @@ -183,6 +221,53 @@ async def test_refresh_async(
weaviate_auth_mock.check_assertions()


@pytest.mark.asyncio
async def test_async_auth_starts_no_threads(
weaviate_auth_mock: HTTPServer, start_grpc_server: grpc.Server
) -> None:
"""The async client must refresh tokens with an asyncio task, not threads.

Under WASM/Pyodide threads cannot start at all, so the TokenRefresh daemon thread
and the event-loop sidecar thread would make every async OIDC flow crash connect().
"""
import threading

weaviate_auth_mock.expect_request(
"/v1/schema", headers={"Authorization": "Bearer " + ACCESS_TOKEN}
).respond_with_json({"classes": []})
weaviate_auth_mock.expect_request("/auth").respond_with_json(
{
"access_token": ACCESS_TOKEN,
"expires_in": 500,
"refresh_token": REFRESH_TOKEN,
}
)

# compare thread OBJECTS, not names: earlier sync tests leave stale TokenRefresh
# daemon threads alive, which would mask a regression in a name-set comparison
threads_before = set(threading.enumerate())
tasks_before = asyncio.all_tasks()
async with weaviate.use_async_with_local(
host=MOCK_IP,
port=MOCK_PORT,
grpc_port=MOCK_PORT_GRPC,
auth_credentials=weaviate.auth.AuthBearerToken(
ACCESS_TOKEN, refresh_token=REFRESH_TOKEN, expires_in=500
),
) as client:
await client.collections.list_all()
new_thread_names = {t.name for t in set(threading.enumerate()) - threads_before}
assert "TokenRefresh" not in new_thread_names
assert "eventLoop" not in new_thread_names
refresh_tasks = [
t for t in asyncio.all_tasks() - tasks_before if "token_refresh" in repr(t.get_coro())
]
assert len(refresh_tasks) == 1 # the refresher runs as an asyncio task instead
# ... and close() must cancel it, not leak it (one wait for the cancellation to land)
await asyncio.wait(refresh_tasks, timeout=1)
assert refresh_tasks[0].done()


def test_refresh_of_refresh(weaviate_auth_mock: HTTPServer, start_grpc_server: grpc.Server) -> None:
"""Test that refresh tokens are used to get a new refresh token token."""
weaviate_auth_mock.expect_request(
Expand Down
98 changes: 98 additions & 0 deletions packages/grpc-web/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# weaviate-python-grpc-web

A grpc-web / WebAssembly (Pyodide) transport for the
[Weaviate Python client](https://github.com/weaviate/weaviate-python-client), so the
client's **async** gRPC data path can run inside a browser (marimo notebooks, Pyodide,
WASM workers) where there is no socket and no `grpcio` wheel.

It is built from the same repository as `weaviate-client` and reuses its generated
protobuf stubs — it does **not** fork code generation.

## How it works

Under Pyodide there is no `grpcio` Emscripten wheel, and `import weaviate` hard-imports
`grpc` at module load. This package installs a small pure-Python `grpc` shim into
`sys.modules` **before** `import weaviate`, which:

- satisfies every import-time `import grpc` / `from grpc(.aio) import ...` in the base
client and its generated `*_pb2_grpc` stubs;
- provides `grpc.aio.Channel` as a real base class, so the grpc-web channel
(`GrpcWebChannel`) subclasses it and the client's `isinstance(..., grpc.aio.Channel)`
assertions pass;
- satisfies the generated v6300 stub's version gate
(`grpc.__version__` / `grpc._utilities.first_version_is_lower`).

The `GrpcWebChannel` frames unary RPCs as grpc-web (a 5-byte header + protobuf payload)
and POSTs them via `pyodide.http.pyfetch` to a server fronted by a grpc-web transcoder
(e.g. Envoy or [connectrpc/vanguard](https://github.com/connectrpc/vanguard-go)). Call
metadata (API key / OIDC bearer) is folded into `fetch` headers.

For REST, Pyodide ≥ 0.27 distributes a patched httpx that already routes through the
browser's `fetch` natively — when that build is detected the package leaves it alone.
Only when httpx resolved from PyPI (httpcore + raw sockets, which cannot work under
WASM) does the package patch `httpx.AsyncHTTPTransport` with its own pyfetch-based
transport.

## Usage

```python
import weaviate_grpc_web # installs the grpc shim under Emscripten (no-op elsewhere)
import weaviate

client = weaviate.use_async_with_local(skip_init_checks=True)
await client.connect()
collection = client.collections.get("Article")
await collection.query.near_text("hello", limit=3)
```

## Supported / unsupported

| Feature | Kind | Status |
|----------------------------------------------------------|-----------------|--------|
| Search, Aggregate, TenantsGet, BatchObjects, BatchDelete | unary gRPC | ✅ works over grpc-web |
| Health check (`/grpc.health.v1.Health/Check`) | unary gRPC | ✅ (recommend `skip_init_checks=True` + REST `/.well-known/ready`) |
| REST (`is_ready`, config, `/batch/references`, …) | REST | ✅ via fetch (Pyodide's httpx build, or this package's fallback transport) |
| API-key auth (`Auth.api_key`) | header | ✅ |
| OIDC auth (`client_credentials` / `client_password` / `bearer_token`) | REST | ✅ token fetch + asyncio-task refresh (no threads) |
| Bulk insert: `collection.data.insert_many()` | unary gRPC | ✅ the supported bulk path under WASM |
| `batch.stream()` / `batch.experimental()` (BatchStream) | bidi streaming | ❌ not possible over grpc-web/fetch — raises immediately; use `insert_many()` |
| `batch.dynamic()` / `fixed_size()` / `rate_limit()` | sync-client API | ❌ these only exist on the sync client, which is unsupported under WASM |
| Embedded Weaviate (`use_async_with_embedded`) | subprocess | ❌ raises "not supported under WebAssembly/Pyodide" |
| Synchronous client | — | ❌ async-only under WASM |
| Weaviate Agents: `AsyncQueryAgent` `run/ask/search` | REST | ✅ via fetch |
| Weaviate Agents: `ask_stream` / `research_stream` (SSE) | REST streaming | ⚠️ degraded under the fallback transport: fully buffered, events arrive only when the run completes (and long runs can hit the request timeout) |
| Weaviate Agents: sync `QueryAgent`, `TransformationAgent`, `PersonalizationAgent` | REST sync | ❌ no async flavour exists |

## Configuration not honored in the browser

`fetch` manages connections itself, so several knobs are accepted but have no effect
under WASM:

- `AdditionalConfig.proxies` / `trust_env` proxy environment variables (the browser
cannot proxy fetch requests per-client),
- connection-pool sizing and `session_pool_max_retries`,
- `GrpcConfig.credentials` (custom CA bundles — the browser's trust store decides TLS),
- `GrpcConfig.channel_options`, including `grpc.max_send/receive_message_length`
(only `grpc-web.path_prefix` is consumed),
- `Proxies.grpc` / `GRPC_PROXY`.

## CORS requirements (browsers)

Cross-origin browser deployments must configure the grpc-web transcoder / REST endpoint
with CORS, or failures become hard to diagnose:

- allow the request headers the client sends: `authorization`, `content-type`,
`x-grpc-web`, and any custom headers;
- expose the grpc-web status headers on responses:
`Access-Control-Expose-Headers: grpc-status, grpc-message` — without this,
trailers-only error responses (e.g. a bad API key) are reported as
`INTERNAL: grpc-web response contained no message frame` instead of the real error;
- note that a CORS-blocked request is indistinguishable from a network failure in the
browser (`TypeError: Failed to fetch`), and is retried as UNAVAILABLE.

## Testing on CPython

`weaviate_grpc_web.install(force=True)` installs the shim on a normal CPython
interpreter (run it in a fresh process, before importing `weaviate`). Inject a sender
with `weaviate_grpc_web.set_sender(...)` (e.g. `make_httpx_sender()`) to exercise the
transport against an Envoy/vanguard transcoder without a browser.
30 changes: 30 additions & 0 deletions packages/grpc-web/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[build-system]
requires = ["setuptools>=65", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "weaviate-python-grpc-web"
description = "grpc-web / WASM (Pyodide) transport for the Weaviate Python client"
readme = "README.md"
requires-python = ">=3.10"
license = { text = "BSD-3-Clause" }
authors = [{ name = "Weaviate", email = "hello@weaviate.io" }]
keywords = ["weaviate", "grpc-web", "pyodide", "wasm", "emscripten"]
# Version is kept in lockstep with weaviate-client. TODO(lockstep): derive from the same
# git tag via setuptools_scm and assert the built versions match in CI before publishing.
version = "0.0.1.dev0"
# Deliberately depends on weaviate-client WITHOUT grpcio (grpcio is excluded under
# Emscripten by the `sys_platform != "emscripten"` marker in the base package's deps).
dependencies = [
"weaviate-client",
]

[project.urls]
Source = "https://github.com/weaviate/weaviate-python-client"
Tracker = "https://github.com/weaviate/weaviate-python-client/issues"

[tool.setuptools.packages.find]
where = ["src"]

[tool.setuptools.package-data]
weaviate_grpc_web = ["py.typed"]
64 changes: 64 additions & 0 deletions packages/grpc-web/src/weaviate_grpc_web/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""grpc-web / WASM transport for the Weaviate Python client.

Under Pyodide/Emscripten there is no ``grpcio`` wheel. Importing this package installs a
pure-Python ``grpc`` shim into ``sys.modules`` (and forces the pure-Python protobuf
runtime) so that the subsequent ``import weaviate`` succeeds and its async gRPC data path
runs over grpc-web (``fetch``) instead of HTTP/2 sockets.

Usage under Pyodide::

import weaviate_grpc_web # installs the grpc shim (no-op off Emscripten)
import weaviate

client = weaviate.use_async_with_local(skip_init_checks=True)
await client.connect()

The shim is installed automatically only under Emscripten, so importing this package on a
normal CPython install never clobbers a real, working ``grpcio``. Async clients only —
the synchronous client is not supported in the browser.
"""

import os
import sys

from ._shim import StatusCode, install, is_installed

__all__ = [
"install",
"is_installed",
"install_fetch_transport",
"uninstall_fetch_transport",
"is_fetch_transport_installed",
"set_sender",
"make_httpx_sender",
"GrpcWebChannel",
"StatusCode",
]


def _bootstrap() -> None:
if sys.platform == "emscripten":
# The pure-Python protobuf runtime always works; the upb C-extension may not be
# present. Set before ``import weaviate`` (which imports protobuf) so it takes
# effect. ``setdefault`` lets a user override it explicitly.
os.environ.setdefault("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python")
install()
# The REST path needs fetch too: httpx/httpcore open raw sockets, which do
# not exist under WASM. Imported lazily so CPython imports stay light.
from ._httpx_fetch import install_fetch_transport

install_fetch_transport()


_bootstrap()

# Imported after the bootstrap. These modules pull their grpc base classes directly from
# ``._shim`` (not via ``sys.modules['grpc']``), so importing them is safe regardless of
# whether the shim was installed.
from ._channel import GrpcWebChannel, set_sender # noqa: E402
from ._httpx_fetch import ( # noqa: E402
install_fetch_transport,
is_fetch_transport_installed,
uninstall_fetch_transport,
)
from ._sender import make_httpx_sender # noqa: E402
Loading
Loading