@@ -158,18 +158,19 @@ class DataConverter:
158158 Supports:
159159 - Scalars (broadcast to all dimensions)
160160 - 1D data (np.ndarray, pd.Series, single-column DataFrame)
161+ - Multi-dimensional arrays
161162 - xr.DataArray (validated and potentially broadcast)
162163
163164 Simple 1D data is matched to one dimension and broadcast to others.
164165 DataArrays can have any number of dimensions.
165166 """
166167
167168 @staticmethod
168- def _convert_series_by_index (
169+ def _match_series_to_dimension (
169170 data : pd .Series , coords : Dict [str , pd .Index ], target_dims : Tuple [str , ...]
170171 ) -> xr .DataArray :
171172 """
172- Convert pandas Series to DataArray by matching index to coordinates.
173+ Match pandas Series to a dimension by comparing its index to coordinates.
173174
174175 Args:
175176 data: pandas Series
@@ -196,11 +197,11 @@ def _convert_series_by_index(
196197 raise ConversionError (f'Series index does not match any target dimension coordinates: { target_dims } ' )
197198
198199 @staticmethod
199- def _convert_1d_array_by_length (
200+ def _match_array_to_dimension (
200201 data : np .ndarray , coords : Dict [str , pd .Index ], target_dims : Tuple [str , ...]
201202 ) -> xr .DataArray :
202203 """
203- Convert 1D numpy array to DataArray by matching length to coordinates .
204+ Match 1D numpy array to a dimension by comparing its length to coordinate lengths .
204205
205206 Args:
206207 data: 1D numpy array
@@ -218,7 +219,7 @@ def _convert_1d_array_by_length(
218219 raise ConversionError ('Cannot convert multi-element array without target dimensions' )
219220 return xr .DataArray (data [0 ])
220221
221- # Match by length
222+ # Find dimensions with matching lengths
222223 matching_dims = []
223224 for dim_name in target_dims :
224225 if len (data ) == len (coords [dim_name ]):
@@ -239,110 +240,19 @@ def _convert_1d_array_by_length(
239240 return xr .DataArray (data .copy (), coords = {match_dim : coords [match_dim ]}, dims = [match_dim ])
240241
241242 @staticmethod
242- def _broadcast_to_target_dims (
243- data : xr .DataArray , coords : Dict [str , pd .Index ], target_dims : Tuple [str , ...]
244- ) -> xr .DataArray :
245- """
246- Broadcast DataArray to match target dimensions.
247-
248- Args:
249- data: Source DataArray
250- coords: Target coordinates
251- target_dims: Target dimension names
252-
253- Returns:
254- DataArray broadcast to target dimensions
255- """
256- if len (target_dims ) == 0 :
257- # Target is scalar
258- if data .size != 1 :
259- raise ConversionError ('Cannot convert multi-element DataArray to scalar' )
260- return xr .DataArray (data .values .item ())
261-
262- # If data already matches target, validate coordinates and return
263- if set (data .dims ) == set (target_dims ) and len (data .dims ) == len (target_dims ):
264- # Check coordinate compatibility
265- for dim in data .dims :
266- if dim in coords and not np .array_equal (data .coords [dim ].values , coords [dim ].values ):
267- raise ConversionError (f'DataArray { dim } coordinates do not match target coordinates' )
268-
269- # Ensure correct dimension order
270- if data .dims != target_dims :
271- data = data .transpose (* target_dims )
272- return data .copy ()
273-
274- # Handle scalar data (0D) - broadcast to all dimensions
275- if data .ndim == 0 :
276- return xr .DataArray (data .item (), coords = coords , dims = target_dims )
277-
278- # Handle broadcasting from fewer to more dimensions
279- if len (data .dims ) < len (target_dims ):
280- return DataConverter ._expand_to_more_dims (data , coords , target_dims )
281-
282- # Cannot handle more dimensions than target
283- if len (data .dims ) > len (target_dims ):
284- raise ConversionError (f'Cannot reduce DataArray from { len (data .dims )} to { len (target_dims )} dimensions' )
285-
286- raise ConversionError (f'Cannot convert DataArray with dims { data .dims } to target dims { target_dims } ' )
287-
288- @staticmethod
289- def _expand_to_more_dims (
290- data : xr .DataArray , coords : Dict [str , pd .Index ], target_dims : Tuple [str , ...]
291- ) -> xr .DataArray :
292- """Expand DataArray to additional dimensions by broadcasting."""
293- # Validate that all source dimensions exist in target
294- for dim in data .dims :
295- if dim not in target_dims :
296- raise ConversionError (f'Source dimension "{ dim } " not found in target dimensions { target_dims } ' )
297-
298- # Check coordinate compatibility
299- if not np .array_equal (data .coords [dim ].values , coords [dim ].values ):
300- raise ConversionError (f'Source { dim } coordinates do not match target coordinates' )
301-
302- # Start with the original data
303- result_data = data .values
304- result_dims = list (data .dims )
305- result_coords = {dim : data .coords [dim ] for dim in data .dims }
306-
307- # Add missing dimensions one by one
308- for target_dim in target_dims :
309- if target_dim not in result_dims :
310- # Add this dimension at the end
311- result_data = np .expand_dims (result_data , axis = - 1 )
312- result_dims .append (target_dim )
313- result_coords [target_dim ] = coords [target_dim ]
314-
315- # Broadcast along the new dimension
316- new_shape = list (result_data .shape )
317- new_shape [- 1 ] = len (coords [target_dim ])
318- result_data = np .broadcast_to (result_data , new_shape )
319-
320- # Reorder dimensions to match target order
321- if tuple (result_dims ) != target_dims :
322- # Create mapping from current to target order
323- dim_indices = [result_dims .index (dim ) for dim in target_dims ]
324- result_data = np .transpose (result_data , dim_indices )
325-
326- # Build final coordinates dict in target order
327- final_coords = {dim : coords [dim ] for dim in target_dims }
328-
329- return xr .DataArray (result_data .copy (), coords = final_coords , dims = target_dims )
330-
331- @staticmethod
332- def _convert_multid_array_by_shape (
243+ def _match_multidim_array_to_dimensions (
333244 data : np .ndarray , coords : Dict [str , pd .Index ], target_dims : Tuple [str , ...]
334245 ) -> xr .DataArray :
335246 """
336- Convert multi-dimensional numpy array to DataArray by matching dimensions by shape.
337- Returns a DataArray that may need further broadcasting to target dimensions.
247+ Match multi-dimensional numpy array to dimensions by finding the correct shape permutation.
338248
339249 Args:
340250 data: Multi-dimensional numpy array
341251 coords: Available coordinates
342252 target_dims: Target dimension names
343253
344254 Returns:
345- DataArray with dimensions matched by shape (may be subset of target_dims)
255+ DataArray with dimensions matched by shape
346256
347257 Raises:
348258 ConversionError: If array dimensions cannot be uniquely matched to coordinates
@@ -352,17 +262,14 @@ def _convert_multid_array_by_shape(
352262 raise ConversionError ('Cannot convert multi-element array without target dimensions' )
353263 return xr .DataArray (data .item ())
354264
355- # Get lengths of each dimension
265+ from itertools import permutations
266+
356267 array_shape = data .shape
357268 coord_lengths = {dim : len (coords [dim ]) for dim in target_dims }
358269
359- # Find all possible ways to match array dimensions to available coordinates
360- from itertools import permutations
361-
362- # Try all permutations of target_dims that match the array's number of dimensions
270+ # Find all possible dimension mappings
363271 possible_mappings = []
364272 for dim_subset in permutations (target_dims , data .ndim ):
365- # Check if this permutation matches the array shape
366273 if all (array_shape [i ] == coord_lengths [dim_subset [i ]] for i in range (len (dim_subset ))):
367274 possible_mappings .append (dim_subset )
368275
@@ -376,58 +283,80 @@ def _convert_multid_array_by_shape(
376283 'Cannot uniquely determine dimension mapping.'
377284 )
378285
379- # Use the unique mapping found
380286 matched_dims = possible_mappings [0 ]
381287 matched_coords = {dim : coords [dim ] for dim in matched_dims }
382288
383- # Return DataArray with matched dimensions - broadcasting will happen later if needed
384289 return xr .DataArray (data .copy (), coords = matched_coords , dims = matched_dims )
385290
291+ @staticmethod
292+ def _broadcast_to_target (
293+ data : xr .DataArray , coords : Dict [str , pd .Index ], target_dims : Tuple [str , ...]
294+ ) -> xr .DataArray :
295+ """
296+ Broadcast DataArray to target dimensions with validation.
297+
298+ Handles all cases: scalar expansion, dimension validation, coordinate matching,
299+ and broadcasting to additional dimensions using xarray's capabilities.
300+ """
301+ # Cannot reduce dimensions of data
302+ if len (data .dims ) > len (target_dims ):
303+ raise ConversionError (f'Cannot reduce DataArray from { len (data .dims )} to { len (target_dims )} dimensions' )
304+
305+ # Validate coordinate compatibility
306+ for dim in data .dims :
307+ if dim not in target_dims :
308+ raise ConversionError (f'Source dimension "{ dim } " not found in target dimensions { target_dims } ' )
309+
310+ if not np .array_equal (data .coords [dim ].values , coords [dim ].values ):
311+ raise ConversionError (f'DataArray { dim } coordinates do not match target coordinates' )
312+
313+ # Use xarray's broadcast_like for efficient expansion and broadcasting
314+ target_template = xr .DataArray (
315+ np .empty ([len (coords [dim ]) for dim in target_dims ]), coords = coords , dims = target_dims
316+ )
317+ return data .broadcast_like (target_template ).transpose (* target_dims )
318+
386319 @classmethod
387320 def to_dataarray (
388321 cls ,
389- data : Union [Scalar , np .ndarray , pd .Series , pd .DataFrame , xr .DataArray , TimeSeriesData ],
322+ data : Union [float , int , np .ndarray , pd .Series , pd .DataFrame , xr .DataArray ],
390323 coords : Optional [Dict [str , pd .Index ]] = None ,
391324 ) -> xr .DataArray :
392325 """
393- Convert data to xarray.DataArray with specified coordinates.
394-
395- Accepts:
396- - Scalars (broadcast to all dimensions)
397- - 1D arrays or Series (matched to one dimension, broadcast to others)
398- - Multi-D arrays or DataFrames (dimensions matched by length, broadcast to remaining)
399- - xr.DataArray (validated and potentially broadcast to additional dimensions)
326+ Convert various data types to xarray.DataArray with specified coordinates.
400327
401328 Args:
402- data: Data to convert
329+ data: Data to convert (scalar, array, Series, DataFrame, or DataArray)
403330 coords: Dictionary mapping dimension names to coordinate indices
404331
405332 Returns:
406- DataArray with the converted data
333+ DataArray with the converted data broadcast to target dimensions
334+
335+ Raises:
336+ ConversionError: If data cannot be converted or dimensions are ambiguous
407337 """
408338 if coords is None :
409339 coords = {}
410340
411- validated_coords , target_dims = cls ._validate_and_prepare_coords (coords )
341+ validated_coords , target_dims = cls ._prepare_coordinates (coords )
412342
413- # Step 1: Convert to DataArray (may have fewer dimensions than target)
343+ # Step 1: Convert input data to initial DataArray
414344 if isinstance (data , (int , float , np .integer , np .floating )):
415- # Scalars: create 0D DataArray, will be broadcast later
345+ # Scalar values
416346 intermediate = xr .DataArray (data .item () if hasattr (data , 'item' ) else data )
417347
418348 elif isinstance (data , np .ndarray ):
419349 if data .ndim == 1 :
420- intermediate = cls ._convert_1d_array_by_length (data , validated_coords , target_dims )
350+ intermediate = cls ._match_array_to_dimension (data , validated_coords , target_dims )
421351 else :
422- # Handle multi-dimensional arrays - this now allows partial matching
423- intermediate = cls ._convert_multid_array_by_shape (data , validated_coords , target_dims )
352+ intermediate = cls ._match_multidim_array_to_dimensions (data , validated_coords , target_dims )
424353
425354 elif isinstance (data , pd .Series ):
426355 if isinstance (data .index , pd .MultiIndex ):
427356 raise ConversionError (
428357 'Series index must be a single level Index. Multi-index Series are not supported.'
429358 )
430- intermediate = cls ._convert_series_by_index (data , validated_coords , target_dims )
359+ intermediate = cls ._match_series_to_dimension (data , validated_coords , target_dims )
431360
432361 elif isinstance (data , pd .DataFrame ):
433362 if isinstance (data .index , pd .MultiIndex ):
@@ -438,44 +367,40 @@ def to_dataarray(
438367 raise ConversionError ('DataFrame must have at least one column.' )
439368
440369 if len (data .columns ) == 1 :
441- intermediate = cls ._convert_series_by_index (
442- data .iloc [:, 0 ], validated_coords , target_dims
443- )
370+ # Single-column DataFrame - treat as Series
371+ intermediate = cls ._match_series_to_dimension (data .iloc [:, 0 ], validated_coords , target_dims )
444372 else :
445- # Handle multi-column DataFrames - this now allows partial matching
446- logger .warning ('Converting multi-column DataFrame to xr.DataArray. We advise to do this manually.' )
447- intermediate = cls ._convert_multid_array_by_shape (
448- data .to_numpy (), validated_coords , target_dims
449- )
373+ # Multi-column DataFrame - treat as multi-dimensional array
374+ intermediate = cls ._match_multidim_array_to_dimensions (data .to_numpy (), validated_coords , target_dims )
450375
451376 elif isinstance (data , xr .DataArray ):
452377 intermediate = data .copy ()
453378
454379 else :
455- raise ConversionError (
456- f'Unsupported data type: { type (data ).__name__ } . Only scalars, arrays, Series, DataFrames, and DataArrays are supported.'
457- )
380+ raise ConversionError (f'Unsupported data type: { type (data ).__name__ } .' )
458381
459- # Step 2: Broadcast to target dimensions if needed
460- # This now handles cases where intermediate has some but not all target dimensions
461- return cls ._broadcast_to_target_dims (intermediate , validated_coords , target_dims )
382+ # Step 2: Broadcast to target dimensions
383+ return cls ._broadcast_to_target (intermediate , validated_coords , target_dims )
462384
463385 @staticmethod
464- def _validate_and_prepare_coords (coords : Dict [str , pd .Index ]) -> Tuple [Dict [str , pd .Index ], Tuple [str , ...]]:
386+ def _prepare_coordinates (coords : Dict [str , pd .Index ]) -> Tuple [Dict [str , pd .Index ], Tuple [str , ...]]:
465387 """
466- Validate and prepare coordinates for the DataArray.
388+ Validate coordinates and prepare them for DataArray creation .
467389
468390 Args:
469391 coords: Dictionary mapping dimension names to coordinate indices
470392
471393 Returns:
472394 Tuple of (validated coordinates dict, dimensions tuple)
395+
396+ Raises:
397+ ConversionError: If coordinates are invalid
473398 """
474399 validated_coords = {}
475400 dims = []
476401
477402 for dim_name , coord_index in coords .items ():
478- # Validate coordinate index
403+ # Basic validation
479404 if not isinstance (coord_index , pd .Index ) or len (coord_index ) == 0 :
480405 raise ConversionError (f'{ dim_name } coordinates must be a non-empty pandas Index' )
481406
0 commit comments