@@ -217,18 +217,16 @@ def test_run(self, test_documents):
217217 )
218218 def test_batch_processing (self , test_documents ):
219219 """Test that batch processing works"""
220- many_documents = test_documents * 50
221-
222220 embedder = WatsonxDocumentEmbedder (
223221 model = "ibm/slate-30m-english-rtrvr" ,
224222 api_key = Secret .from_env_var ("WATSONX_API_KEY" ),
225223 project_id = Secret .from_env_var ("WATSONX_PROJECT_ID" ),
226- batch_size = 50 ,
224+ batch_size = 2 ,
227225 truncate_input_tokens = 128 ,
228226 )
229227
230- result = embedder .run (many_documents )
231- assert len (result ["documents" ]) == 150
228+ result = embedder .run (test_documents )
229+ assert len (result ["documents" ]) == 3
232230 assert all (doc .embedding is not None for doc in result ["documents" ])
233231
234232 @pytest .mark .skipif (
@@ -237,51 +235,16 @@ def test_batch_processing(self, test_documents):
237235 )
238236 def test_text_truncation (self ):
239237 """Test that truncation works with long documents"""
240- long_content = "This is a very long document. " * 1000
238+ long_content = "This is a very long document. " * 10
241239 long_document = Document (content = long_content )
242240
243241 embedder = WatsonxDocumentEmbedder (
244242 model = "ibm/slate-30m-english-rtrvr" ,
245243 api_key = Secret .from_env_var ("WATSONX_API_KEY" ),
246244 project_id = Secret .from_env_var ("WATSONX_PROJECT_ID" ),
247- truncate_input_tokens = 128 ,
245+ truncate_input_tokens = 4 ,
248246 )
249247
250248 result = embedder .run ([long_document ])
251249 assert len (result ["documents" ][0 ].embedding ) > 0
252- assert result ["meta" ]["truncate_input_tokens" ] == 128
253-
254- @pytest .mark .skipif (
255- not os .environ .get ("WATSONX_API_KEY" ) or not os .environ .get ("WATSONX_PROJECT_ID" ),
256- reason = "WATSONX_API_KEY or WATSONX_PROJECT_ID not set" ,
257- )
258- def test_prefix_suffix (self , test_documents ):
259- """Test that prefix and suffix are correctly applied"""
260- embedder = WatsonxDocumentEmbedder (
261- model = "ibm/slate-30m-english-rtrvr" ,
262- api_key = Secret .from_env_var ("WATSONX_API_KEY" ),
263- project_id = Secret .from_env_var ("WATSONX_PROJECT_ID" ),
264- prefix = "PREFIX: " ,
265- suffix = " :SUFFIX" ,
266- truncate_input_tokens = 128 ,
267- )
268-
269- result = embedder .run ([test_documents [0 ]])
270- assert result ["documents" ][0 ].embedding is not None
271-
272- @pytest .mark .skipif (
273- not os .environ .get ("WATSONX_API_KEY" ) or not os .environ .get ("WATSONX_PROJECT_ID" ),
274- reason = "WATSONX_API_KEY or WATSONX_PROJECT_ID not set" ,
275- )
276- def test_concurrency_handling (self , test_documents ):
277- """Test that concurrency limits are respected"""
278- embedder = WatsonxDocumentEmbedder (
279- model = "ibm/slate-30m-english-rtrvr" ,
280- api_key = Secret .from_env_var ("WATSONX_API_KEY" ),
281- project_id = Secret .from_env_var ("WATSONX_PROJECT_ID" ),
282- concurrency_limit = 2 ,
283- batch_size = 1 ,
284- )
285-
286- result = embedder .run (test_documents )
287- assert len (result ["documents" ]) == 3
250+ assert result ["meta" ]["truncate_input_tokens" ] == 4
0 commit comments