@@ -1419,6 +1419,39 @@ def test_varlen_write_floats(self):
14191419 # can't use assert_array_equal w/ object array
14201420 self .assertTrue (all (np .array_equal (x , A [i ]) for i , x in enumerate (T_ )))
14211421
1422+ def test_varlen_write_homogeneous_subarrays (self ):
1423+ """Test writing var-length attributes where all sub-arrays have the
1424+ same length. numpy coalesces these into a 2D array which previously
1425+ caused errors. See https://github.com/TileDB-Inc/TileDB-Py/issues/494
1426+ """
1427+ # All sub-arrays have length 3 — numpy will coalesce into shape (4, 3)
1428+ A = np .array (
1429+ [
1430+ np .array ([1 , 2 , 9 ], dtype = np .int64 ),
1431+ np .array ([3 , 4 , 5 ], dtype = np .int64 ),
1432+ np .array ([7 , 8 , 6 ], dtype = np .int64 ),
1433+ np .array ([10 , 11 , 12 ], dtype = np .int64 ),
1434+ ],
1435+ dtype = "O" ,
1436+ )
1437+
1438+ dom = tiledb .Domain (tiledb .Dim (domain = (1 , 4 ), tile = 4 ))
1439+ att = tiledb .Attr (name = "val" , dtype = np .int64 , var = True )
1440+ schema = tiledb .ArraySchema (dom , (att ,))
1441+ tiledb .DenseArray .create (self .path ("homogeneous_varlen" ), schema )
1442+
1443+ with tiledb .DenseArray (self .path ("homogeneous_varlen" ), mode = "w" ) as T :
1444+ T [:] = {"val" : A }
1445+
1446+ with tiledb .DenseArray (self .path ("homogeneous_varlen" ), mode = "r" ) as T :
1447+ res = T [:]["val" ]
1448+ expected = np .empty (4 , dtype = object )
1449+ expected [0 ] = np .array ([1 , 2 , 9 ], dtype = np .int64 )
1450+ expected [1 ] = np .array ([3 , 4 , 5 ], dtype = np .int64 )
1451+ expected [2 ] = np .array ([7 , 8 , 6 ], dtype = np .int64 )
1452+ expected [3 ] = np .array ([10 , 11 , 12 ], dtype = np .int64 )
1453+ assert_subarrays_equal (res , expected )
1454+
14221455 def test_varlen_write_floats_2d (self ):
14231456 A = np .array (
14241457 [np .random .rand (x ) for x in np .arange (1 , 10 )], dtype = object
@@ -2249,6 +2282,53 @@ def test_sparse_2d_varlen_int(self, fx_sparse_cell_order):
22492282 assert_unordered_equal (res ["__dim_0" ], c1 )
22502283 assert_unordered_equal (res ["__dim_1" ], c2 )
22512284
2285+ @pytest .mark .parametrize (
2286+ "dtype,use_object_dtype" ,
2287+ [
2288+ (np .int64 , True ),
2289+ (np .int64 , False ),
2290+ (np .int32 , True ),
2291+ (np .float32 , True ),
2292+ (np .float64 , False ),
2293+ (np .uint32 , True ),
2294+ ],
2295+ )
2296+ def test_sparse_varlen_homogeneous_subarrays (
2297+ self , fx_sparse_cell_order , dtype , use_object_dtype
2298+ ):
2299+ """Test writing var-length attributes where all sub-arrays have the
2300+ same length. numpy coalesces these into a 2D array which previously
2301+ caused a 'value length does not match coordinate length' error.
2302+ See https://github.com/TileDB-Inc/TileDB-Py/issues/494
2303+ """
2304+ path = self .path ("test_sparse_varlen_homogeneous_subarrays" )
2305+ dom = tiledb .Domain (tiledb .Dim (domain = (0 , 10 ), dtype = np .int64 ))
2306+ att = tiledb .Attr (name = "val" , var = True , dtype = dtype )
2307+ schema = tiledb .ArraySchema (
2308+ dom , (att ,), sparse = True , cell_order = fx_sparse_cell_order
2309+ )
2310+ tiledb .SparseArray .create (path , schema )
2311+
2312+ a = np .array ([1 , 2 , 9 ], dtype = dtype )
2313+ b = np .array ([3 , 4 , 5 ], dtype = dtype )
2314+
2315+ if use_object_dtype :
2316+ # User explicitly passes dtype='O'; becomes 2D after dtype conversion
2317+ vals = np .array ([a , b ], dtype = "O" )
2318+ else :
2319+ # User has no control over dtype; numpy coalesces to 2D native
2320+ vals = np .array ([a , b ])
2321+
2322+ with tiledb .SparseArray (path , "w" ) as A :
2323+ A [[1 , 2 ]] = {"val" : vals }
2324+
2325+ with tiledb .SparseArray (path , "r" ) as A :
2326+ res = A [:]
2327+ expected = np .empty (2 , dtype = object )
2328+ expected [0 ] = a
2329+ expected [1 ] = b
2330+ assert_subarrays_equal (res ["val" ], expected )
2331+
22522332 def test_sparse_mixed_domain_uint_float64 (self , fx_sparse_cell_order ):
22532333 path = self .path ("mixed_domain_uint_float64" )
22542334 dims = [
0 commit comments