Skip to content

Commit d2e1129

Browse files
[2/3] sdks/python: enrich data with Milvus Search [Vector, Keyword, Hybrid] (#35577)
* sdks/python: mark `milvus` as extra dependency * sdks/python: mark milvus itests with `require_docker_in_docker` * .github: trigger postcommit python * .github: trigger postcommit python * .github: trigger postcommit python * sdks/python: fix linting issues * .github: update beam postcommit python * sdks/python: fix linting issues * workflows: trigger postcommit python * sdks/python: update image requirements * workflows: trigger postcommit python
1 parent 7ecbb2a commit d2e1129

13 files changed

Lines changed: 330 additions & 332 deletions
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"comment": "Modify this file in a trivial way to cause this test suite to run.",
3-
"modification": 101
3+
"modification": 35
44
}
55

sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,16 @@
2727
from typing import Union
2828

2929
from google.protobuf.json_format import MessageToDict
30+
31+
from apache_beam.ml.rag.types import Chunk
32+
from apache_beam.ml.rag.types import Embedding
33+
from apache_beam.transforms.enrichment import EnrichmentSourceHandler
3034
from pymilvus import AnnSearchRequest
3135
from pymilvus import Hit
3236
from pymilvus import Hits
3337
from pymilvus import MilvusClient
3438
from pymilvus import SearchResult
3539

36-
from apache_beam.ml.rag.types import Chunk
37-
from apache_beam.ml.rag.types import Embedding
38-
from apache_beam.transforms.enrichment import EnrichmentSourceHandler
39-
4040

4141
class SearchStrategy(Enum):
4242
"""Search strategies for information retrieval.

sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,6 @@
3434

3535
import pytest
3636
import yaml
37-
from pymilvus import CollectionSchema
38-
from pymilvus import DataType
39-
from pymilvus import FieldSchema
40-
from pymilvus import Function
41-
from pymilvus import FunctionType
42-
from pymilvus import MilvusClient
43-
from pymilvus import RRFRanker
44-
from pymilvus.milvus_client import IndexParams
45-
from testcontainers.core.config import MAX_TRIES as TC_MAX_TRIES
46-
from testcontainers.core.config import testcontainers_config
47-
from testcontainers.core.generic import DbContainer
48-
from testcontainers.milvus import MilvusContainer
4937

5038
import apache_beam as beam
5139
from apache_beam.ml.rag.types import Chunk
@@ -54,7 +42,21 @@
5442
from apache_beam.testing.test_pipeline import TestPipeline
5543
from apache_beam.testing.util import assert_that
5644

45+
# pylint: disable=ungrouped-imports
5746
try:
47+
from pymilvus import (
48+
CollectionSchema,
49+
DataType,
50+
FieldSchema,
51+
Function,
52+
FunctionType,
53+
MilvusClient,
54+
RRFRanker)
55+
from pymilvus.milvus_client import IndexParams
56+
from testcontainers.core.config import MAX_TRIES as TC_MAX_TRIES
57+
from testcontainers.core.config import testcontainers_config
58+
from testcontainers.core.generic import DbContainer
59+
from testcontainers.milvus import MilvusContainer
5860
from apache_beam.transforms.enrichment import Enrichment
5961
from apache_beam.ml.rag.enrichment.milvus_search import (
6062
MilvusSearchEnrichmentHandler,
@@ -467,7 +469,7 @@ def create_user_yaml(service_port: int, max_vector_field_num=5):
467469
os.remove(path)
468470

469471

470-
@pytest.mark.uses_testcontainer
472+
@pytest.mark.require_docker_in_docker
471473
@unittest.skipUnless(
472474
platform.system() == "Linux",
473475
"Test runs only on Linux due to lack of support, as yet, for nested "
@@ -483,22 +485,16 @@ class TestMilvusSearchEnrichment(unittest.TestCase):
483485

484486
@classmethod
485487
def setUpClass(cls):
486-
try:
487-
cls._db = MilvusEnrichmentTestHelper.start_db_container(
488-
cls._version, vector_client_max_retries=1, tc_max_retries=1)
489-
cls._connection_params = MilvusConnectionParameters(
490-
uri=cls._db.uri,
491-
user=cls._db.user,
492-
password=cls._db.password,
493-
db_id=cls._db.id,
494-
token=cls._db.token)
495-
cls._collection_load_params = MilvusCollectionLoadParameters()
496-
cls._collection_name = MilvusEnrichmentTestHelper.initialize_db_with_data(
497-
cls._connection_params)
498-
except Exception as e:
499-
pytest.skip(
500-
f"Skipping all tests in {cls.__name__} due to DB startup failure: {e}"
501-
)
488+
cls._db = MilvusEnrichmentTestHelper.start_db_container(cls._version)
489+
cls._connection_params = MilvusConnectionParameters(
490+
uri=cls._db.uri,
491+
user=cls._db.user,
492+
password=cls._db.password,
493+
db_id=cls._db.id,
494+
token=cls._db.token)
495+
cls._collection_load_params = MilvusCollectionLoadParameters()
496+
cls._collection_name = MilvusEnrichmentTestHelper.initialize_db_with_data(
497+
cls._connection_params)
502498

503499
@classmethod
504500
def tearDownClass(cls):

sdks/python/container/py310/base_image_requirements.txt

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,25 @@
2323

2424
aiofiles==24.1.0
2525
aiohappyeyeballs==2.6.1
26-
aiohttp==3.12.14
26+
aiohttp==3.12.15
2727
aiosignal==1.4.0
2828
annotated-types==0.7.0
29-
anyio==4.9.0
29+
anyio==4.10.0
3030
asn1crypto==1.5.1
3131
async-timeout==5.0.1
3232
attrs==25.3.0
3333
backports.tarfile==1.2.0
3434
beautifulsoup4==4.13.4
3535
bs4==0.0.2
36-
build==1.2.2.post1
36+
build==1.3.0
3737
cachetools==5.5.2
38-
certifi==2025.7.14
38+
certifi==2025.8.3
3939
cffi==1.17.1
4040
charset-normalizer==3.4.2
4141
click==8.2.1
4242
cloud-sql-python-connector==1.18.3
4343
crcmod==1.7
44-
cryptography==45.0.5
44+
cryptography==45.0.6
4545
Cython==3.1.2
4646
dill==0.3.1.1
4747
dnspython==2.7.0
@@ -50,26 +50,26 @@ docopt==0.6.2
5050
docstring_parser==0.17.0
5151
exceptiongroup==1.3.0
5252
execnet==2.1.1
53-
fastavro==1.11.1
53+
fastavro==1.12.0
5454
fasteners==0.19
55-
freezegun==1.5.3
55+
freezegun==1.5.4
5656
frozenlist==1.7.0
5757
future==1.0.0
5858
google-api-core==2.25.1
59-
google-api-python-client==2.177.0
59+
google-api-python-client==2.178.0
6060
google-apitools==0.5.31
6161
google-auth==2.40.3
6262
google-auth-httplib2==0.2.0
63-
google-cloud-aiplatform==1.105.0
63+
google-cloud-aiplatform==1.108.0
6464
google-cloud-bigquery==3.35.1
6565
google-cloud-bigquery-storage==2.32.0
66-
google-cloud-bigtable==2.31.0
66+
google-cloud-bigtable==2.32.0
6767
google-cloud-core==2.4.3
6868
google-cloud-datastore==2.21.0
6969
google-cloud-dlp==3.31.0
7070
google-cloud-language==2.17.2
7171
google-cloud-profiler==4.1.0
72-
google-cloud-pubsub==2.31.0
72+
google-cloud-pubsub==2.31.1
7373
google-cloud-pubsublite==1.12.0
7474
google-cloud-recommendations-ai==0.10.18
7575
google-cloud-resource-manager==1.14.2
@@ -78,10 +78,10 @@ google-cloud-storage==2.19.0
7878
google-cloud-videointelligence==2.16.2
7979
google-cloud-vision==3.10.2
8080
google-crc32c==1.7.1
81-
google-genai==1.27.0
81+
google-genai==1.29.0
8282
google-resumable-media==2.7.2
8383
googleapis-common-protos==1.70.0
84-
greenlet==3.2.3
84+
greenlet==3.2.4
8585
grpc-google-iam-v1==0.14.2
8686
grpc-interceptor==0.15.4
8787
grpcio==1.65.5
@@ -92,7 +92,7 @@ hdfs==2.7.3
9292
httpcore==1.0.9
9393
httplib2==0.22.0
9494
httpx==0.28.1
95-
hypothesis==6.136.4
95+
hypothesis==6.137.1
9696
idna==3.10
9797
importlib_metadata==8.7.0
9898
iniconfig==2.1.0
@@ -108,8 +108,8 @@ jsonschema-specifications==2025.4.1
108108
keyring==25.6.0
109109
keyrings.google-artifactregistry-auth==1.1.2
110110
MarkupSafe==3.0.2
111-
milvus-lite==2.5.1
112-
mmh3==5.1.0
111+
milvus==2.3.5
112+
mmh3==5.2.0
113113
mock==5.2.0
114114
more-itertools==10.7.0
115115
multidict==6.6.3
@@ -118,17 +118,17 @@ nltk==3.9.1
118118
numpy==2.2.6
119119
oauth2client==4.1.3
120120
objsize==0.7.1
121-
opentelemetry-api==1.35.0
122-
opentelemetry-sdk==1.35.0
123-
opentelemetry-semantic-conventions==0.56b0
124-
oracledb==3.2.0
121+
opentelemetry-api==1.36.0
122+
opentelemetry-sdk==1.36.0
123+
opentelemetry-semantic-conventions==0.57b0
124+
oracledb==3.3.0
125125
orjson==3.11.1
126126
overrides==7.7.0
127127
packaging==25.0
128128
pandas==2.2.3
129129
parameterized==0.9.0
130130
pg8000==1.31.4
131-
pip==25.1.1
131+
pip==25.2
132132
pluggy==1.6.0
133133
propcache==0.3.2
134134
proto-plus==1.26.1
@@ -144,8 +144,7 @@ pydantic_core==2.33.2
144144
pydot==1.4.2
145145
PyHamcrest==2.1.0
146146
PyJWT==2.10.1
147-
pymilvus==2.5.14
148-
pymongo==4.13.2
147+
pymongo==4.14.0
149148
PyMySQL==1.1.1
150149
pyparsing==3.2.3
151150
pyproject_hooks==1.2.0
@@ -159,10 +158,10 @@ pytz==2025.2
159158
PyYAML==6.0.2
160159
redis==5.3.1
161160
referencing==0.36.2
162-
regex==2024.11.6
161+
regex==2025.7.34
163162
requests==2.32.4
164163
requests-mock==1.12.1
165-
rpds-py==0.26.0
164+
rpds-py==0.27.0
166165
rsa==4.9.1
167166
scikit-learn==1.7.1
168167
scipy==1.15.3
@@ -174,7 +173,7 @@ six==1.17.0
174173
sniffio==1.3.1
175174
sortedcontainers==2.4.0
176175
soupsieve==2.7
177-
SQLAlchemy==2.0.41
176+
SQLAlchemy==2.0.42
178177
sqlalchemy_pytds==1.0.2
179178
sqlparse==0.5.3
180179
tenacity==8.5.0
@@ -185,7 +184,6 @@ tqdm==4.67.1
185184
typing-inspection==0.4.1
186185
typing_extensions==4.14.1
187186
tzdata==2025.2
188-
ujson==5.10.0
189187
uritemplate==4.2.0
190188
urllib3==2.5.0
191189
virtualenv-clone==0.5.7

0 commit comments

Comments
 (0)