From 6ee736d936e46811476bad1df4f1cb73549581f9 Mon Sep 17 00:00:00 2001 From: Kethan Goparapu Date: Wed, 20 May 2026 00:58:01 -0400 Subject: [PATCH] docs: add server Python API examples --- docs/source/build-with-bentoml/clients.rst | 62 ++++++++++++++++++++++ docs/source/reference/bentoml/index.rst | 1 + docs/source/reference/bentoml/server.rst | 17 ++++++ 3 files changed, 80 insertions(+) create mode 100644 docs/source/reference/bentoml/server.rst diff --git a/docs/source/build-with-bentoml/clients.rst b/docs/source/build-with-bentoml/clients.rst index a535c81259d..bad988ddd1f 100644 --- a/docs/source/build-with-bentoml/clients.rst +++ b/docs/source/build-with-bentoml/clients.rst @@ -99,6 +99,68 @@ To enhance resource management and reduce the risk of connection leaks, we recom summarized_text: str = await client.summarize(text="Your long text to summarize") print(summarized_text) +Start a local server from Python +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For integration tests or local automation, you can start a BentoML server from Python +and create a client against the running server. For new code, prefer +``bentoml.serve()`` as a context manager: + +.. code-block:: python + + import bentoml + + with bentoml.serve("service.py:Summarization", port=3000) as server: + with bentoml.SyncHTTPClient(server.url, server_ready_timeout=60) as client: + summarized_text: str = client.summarize(text="Your long text to summarize") + print(summarized_text) + +If you maintain code that uses the lower-level server classes, start the server before +calling ``get_client()`` and stop it when the call path is complete. The +``bentoml.server`` module is deprecated, so use this path only when you need direct +access to the server class: + +.. code-block:: python + + from bentoml.server import HTTPServer + + server = HTTPServer( + "service.py:Summarization", + production=True, + host="127.0.0.1", + port=3000, + ) + server.start(text=True) + + try: + client = server.get_client() + summarized_text: str = client.summarize(text="Your long text to summarize") + print(summarized_text) + finally: + server.stop() + +Use ``GrpcServer`` the same way for a gRPC endpoint: + +.. code-block:: python + + from bentoml.server import GrpcServer + + server = GrpcServer( + "service.py:Summarization", + production=True, + host="127.0.0.1", + port=3000, + enable_reflection=True, + ) + server.start(text=True) + + try: + client = server.get_client() + summarized_text: str = client.summarize(text="Your long text to summarize") + print(summarized_text) + finally: + server.stop() + Check Service readiness ----------------------- diff --git a/docs/source/reference/bentoml/index.rst b/docs/source/reference/bentoml/index.rst index 4f9163fcfe2..01bc9e9b3f9 100644 --- a/docs/source/reference/bentoml/index.rst +++ b/docs/source/reference/bentoml/index.rst @@ -9,6 +9,7 @@ This section contains detailed API specifications. Use them to dig deeper into B stores sdk + server bento-build-options cli client diff --git a/docs/source/reference/bentoml/server.rst b/docs/source/reference/bentoml/server.rst new file mode 100644 index 00000000000..d95a8a227b2 --- /dev/null +++ b/docs/source/reference/bentoml/server.rst @@ -0,0 +1,17 @@ +=========== +Server APIs +=========== + +The ``bentoml.server`` module provides lower-level server classes for existing code +that needs direct control over server startup and shutdown. + +.. warning:: + + The ``bentoml.server`` module is deprecated. For new code, use + ``bentoml.serve()`` as shown in :doc:`/build-with-bentoml/clients`. + +.. autoclass:: bentoml.server.HTTPServer + :members: start, get_client, stop + +.. autoclass:: bentoml.server.GrpcServer + :members: start, get_client, stop