@@ -125,6 +125,27 @@ def test_retrieve_samples(test_ds):
125125 assert test_ds .samples () == ["HG00280" , "HG01762" ]
126126
127127
128+ def test_read_unsupported_regions_type (test_ds ):
129+ unsupported_region = 3.14
130+ unsupported_type_error = f'"regions" parameter cannot have type: { type (unsupported_region )} '
131+ wrong_dimension_region = np .array ([["1:12700-13400" ], ["1:12700-13400" ]])
132+ ndarray_wrong_dimension_error = f'"regions" parameter of type { type (wrong_dimension_region )} must be 1-dimensional'
133+ with pytest .raises (Exception , match = unsupported_type_error ):
134+ test_ds .read (regions = unsupported_region )
135+ with pytest .raises (Exception , match = ndarray_wrong_dimension_error ):
136+ test_ds .read (regions = wrong_dimension_region )
137+ with pytest .raises (Exception , match = unsupported_type_error ):
138+ test_ds .read_arrow (regions = unsupported_region )
139+ with pytest .raises (Exception , match = ndarray_wrong_dimension_error ):
140+ test_ds .read_arrow (regions = wrong_dimension_region )
141+ with pytest .raises (Exception , match = unsupported_type_error ):
142+ for variant in test_ds .read_iter (regions = unsupported_region ):
143+ print (variant )
144+ with pytest .raises (Exception , match = ndarray_wrong_dimension_error ):
145+ for variant in test_ds .read_iter (regions = wrong_dimension_region ):
146+ print (variant )
147+
148+
128149def test_read_attrs (test_ds_attrs ):
129150 attrs = ["sample_name" ]
130151 df = test_ds_attrs .read (attrs = attrs )
@@ -233,6 +254,40 @@ def test_basic_reads(test_ds):
233254 _check_dfs (
234255 expected_df , df .sort_values (ignore_index = True , by = ["sample_name" , "pos_start" ])
235256 )
257+ df = test_ds .read_arrow (
258+ attrs = ["sample_name" , "pos_start" , "pos_end" ], regions = ["1:12700-13400" ]
259+ ).to_pandas ()
260+ _check_dfs (
261+ expected_df , df .sort_values (ignore_index = True , by = ["sample_name" , "pos_start" ])
262+ )
263+
264+ # Regions as string
265+ df = test_ds .read (
266+ attrs = ["sample_name" , "pos_start" , "pos_end" ], regions = "1:12700-13400"
267+ )
268+ _check_dfs (
269+ expected_df , df .sort_values (ignore_index = True , by = ["sample_name" , "pos_start" ])
270+ )
271+ df = test_ds .read_arrow (
272+ attrs = ["sample_name" , "pos_start" , "pos_end" ], regions = "1:12700-13400"
273+ ).to_pandas ()
274+ _check_dfs (
275+ expected_df , df .sort_values (ignore_index = True , by = ["sample_name" , "pos_start" ])
276+ )
277+
278+ # Regions as numpy.ndarray
279+ df = test_ds .read (
280+ attrs = ["sample_name" , "pos_start" , "pos_end" ], regions = np .array (["1:12700-13400" ])
281+ )
282+ _check_dfs (
283+ expected_df , df .sort_values (ignore_index = True , by = ["sample_name" , "pos_start" ])
284+ )
285+ df = test_ds .read_arrow (
286+ attrs = ["sample_name" , "pos_start" , "pos_end" ], regions = np .array (["1:12700-13400" ])
287+ ).to_pandas ()
288+ _check_dfs (
289+ expected_df , df .sort_values (ignore_index = True , by = ["sample_name" , "pos_start" ])
290+ )
236291
237292 # Region and sample intersection
238293 df = test_ds .read (
@@ -382,41 +437,39 @@ def test_incomplete_read_generator():
382437 uri = os .path .join (TESTS_INPUT_DIR , "arrays/v3/ingested_2samples" )
383438 cfg = tiledbvcf .ReadConfig (memory_budget_mb = 0 )
384439 test_ds = tiledbvcf .Dataset (uri , mode = "r" , cfg = cfg )
385-
386- dfs = []
387- for df in test_ds .read_iter (attrs = ["pos_end" ], regions = ["1:12700-13400" ]):
388- dfs .append (df )
389- overall_df = pd .concat (dfs , ignore_index = True )
390-
391- assert len (overall_df ) == 6
392- _check_dfs (
393- pd .DataFrame .from_dict (
440+ expected_df = pd .DataFrame .from_dict (
394441 {
395442 "pos_end" : np .array (
396443 [12771 , 12771 , 13374 , 13389 , 13395 , 13413 ], dtype = np .int32
397444 )
398445 }
399- ),
400- overall_df ,
401- )
446+ )
447+
448+ # NOTE: Running multiple test shows that the iterator can be reused
449+
450+ # Regions as string
451+ dfs = []
452+ for df in test_ds .read_iter (attrs = ["pos_end" ], regions = "1:12700-13400" ):
453+ dfs .append (df )
454+ overall_df = pd .concat (dfs , ignore_index = True )
455+ assert len (overall_df ) == 6
456+ _check_dfs (expected_df , overall_df )
402457
403- # Test that the iterator can be used again
458+ # Regions as list
404459 dfs = []
405460 for df in test_ds .read_iter (attrs = ["pos_end" ], regions = ["1:12700-13400" ]):
406461 dfs .append (df )
407462 overall_df = pd .concat (dfs , ignore_index = True )
463+ assert len (overall_df ) == 6
464+ _check_dfs (expected_df , overall_df )
408465
466+ # Regions as numpy.ndarray
467+ dfs = []
468+ for df in test_ds .read_iter (attrs = ["pos_end" ], regions = np .array (["1:12700-13400" ])):
469+ dfs .append (df )
470+ overall_df = pd .concat (dfs , ignore_index = True )
409471 assert len (overall_df ) == 6
410- _check_dfs (
411- pd .DataFrame .from_dict (
412- {
413- "pos_end" : np .array (
414- [12771 , 12771 , 13374 , 13389 , 13395 , 13413 ], dtype = np .int32
415- )
416- }
417- ),
418- overall_df ,
419- )
472+ _check_dfs (expected_df , overall_df )
420473
421474
422475def test_read_filters (test_ds ):
0 commit comments