Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Fixed
- A bug where `UVData.scan_number_array` was not properly updated when objects
were added or concatenated. The fix for was a major refactoring that should prevent
similar bugs in the future by using UVParameter forms to detect which attributes
to update on UVData objects.

## [3.2.6] - 2025-03-13

### Added
Expand Down
39 changes: 39 additions & 0 deletions src/pyuvdata/utils/frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from . import tools
from .pol import jstr2num, polstr2num
from .types import FloatArray, IntArray


def _check_flex_spw_contiguous(*, spw_array, flex_spw_id_array, strict=True):
Expand Down Expand Up @@ -549,3 +550,41 @@ def _select_freq_helper(
freq_inds = freq_inds.tolist()

return freq_inds, spw_inds, selections


def _add_freq_order(spw_id: IntArray, freq_arr: FloatArray) -> IntArray:
"""
Get the sorting order for the frequency axis after an add.

Sort first by spw then by channel, but don't reorder channels if they are
changing monotonically (all ascending or descending) within the spw.

Parameters
----------
spw_id : np.ndarray of int
SPW id array of combined data to be sorted.
freq_arr : np.ndarray of float
Frequency array of combined data to be sorted.

Returns
-------
f_order : np.ndarray of int
index array giving the sort order.

"""
spws = np.unique(spw_id)
f_order = np.concatenate([np.where(spw_id == spw)[0] for spw in np.unique(spw_id)])

# With spectral windows sorted, check and see if channels within
# windows need sorting. If they are ordered in ascending or descending
# fashion, leave them be. If not, sort in ascending order
for spw in spws:
select_mask = spw_id[f_order] == spw
check_freqs = freq_arr[f_order[select_mask]]
if not np.all(np.diff(check_freqs) > 0) and not np.all(
np.diff(check_freqs) < 0
):
subsort_order = f_order[select_mask]
f_order[select_mask] = subsort_order[np.argsort(check_freqs)]

return f_order
54 changes: 54 additions & 0 deletions src/pyuvdata/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import numpy as np

from .types import FloatArray, IntArray, StrArray


def _get_iterable(x):
"""Return iterable version of input."""
Expand Down Expand Up @@ -687,3 +689,55 @@ def _ntimes_to_nblts(uvd):
inds.append(np.where(unique_t == i)[0][0])

return np.asarray(inds)


def float_int_to_str_array(
*,
fltarr: FloatArray,
intarr: IntArray,
flt_tol: tuple[float, float],
flt_first: bool = True,
) -> StrArray:
"""
Create a string array built from float and integer arrays for matching.
Comment thread
bhazelton marked this conversation as resolved.

Parameters
----------
fltarr : np.ndarray of float
float array to be used in output string array
intarr : np.ndarray of int
integer array to be used in output string array
flt_tol : 2-tuple of float
Absolute tolerance to use in formatting the floats as strings. Note that
this is converted to a decimal place for print formatting, so the precision
might be slightly higher.
flt_first : bool
Whether to put the float first in the out put string or not (if False
the int comes first.)

Returns
-------
np.ndarray of str
String array that combines the float and integer values, useful for matching.

Examples
--------
>>> float_int_to_str_array(fltarr=[np.pi, np.pi/2], intarr=[1, 2], flt_tol=.01)
array(['3.14_00000001', '1.57_00000002'], dtype='<U13')

>>> float_int_to_str_array(
... fltarr=[np.pi, np.pi/2], intarr=[1, 2], flt_tol=.001, flt_first=False
... )
array(['00000001_3.142', '00000002_1.571'], dtype='<U14')

"""
prec_flt = -1 * np.floor(np.log10(flt_tol)).astype(int)
prec_int = 8
flt_str_list = ["{1:.{0}f}".format(prec_flt, flt) for flt in fltarr]
int_str_list = [str(intv).zfill(prec_int) for intv in intarr]
list_of_lists = []
if flt_first:
list_of_lists = [flt_str_list, int_str_list]
else:
list_of_lists = [int_str_list, flt_str_list]
return np.array(["_".join(zpval) for zpval in zip(*list_of_lists, strict=True)])
215 changes: 215 additions & 0 deletions src/pyuvdata/uvbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from . import __version__, parameter as uvp
from .utils.tools import _get_iterable, slicify
from .utils.types import IntArray

__all__ = ["UVBase"]

Expand Down Expand Up @@ -754,6 +755,87 @@ def copy(self):
"""
return copy.deepcopy(self)

def _get_uvparam_axis(self, axis_name: str, single_named_axis: bool = False):
"""
Get a mapping of properties that have a given axis to the axis number.

This uses the forms of the UVParameter attributes on this object to identify
properties derived from UVParameters that have one or more axes associated
with the axis_name. Any properties with an associated axis appear as keys
in the output dict, with the values giving the associated axis numbers.

Parameters
----------
axis_name : str
A named parameter within the object (e.g., "Nblts", "Ntimes", "Nants").
single_named_axis : bool
Option to only include parameters with a single named axis.

Returns
-------
dict
The keys are UVParameter names that have an axis with axis_name
(axis_name appears in their form). The values are an array of the axis
indices where axis_name appears in their form.

Examples
--------
>>> from pyuvdata import UVData
>>> from pyuvdata.datasets import fetch_data
>>> filename = fetch_data("vla_casa_tutorial_uvfits")
>>> uvd = UVData.from_file(filename)
>>> uvd._get_uvparam_axis("Nfreqs")
{'channel_width': array([0]),
'data_array': array([1]),
'flag_array': array([1]),
'flex_spw_id_array': array([0]),
'freq_array': array([0]),
'nsample_array': array([1])}

>>> uvd._get_uvparam_axis("Nfreqs", single_named_axis=True)
{'channel_width': array([0]),
'flex_spw_id_array': array([0]),
'freq_array': array([0])}

"""
ret_dict = {}
for param in self:
# For each attribute, if the value is None, then bail, otherwise
# find the axis number(s) with the named shape.

attr = getattr(self, param)
# Only look at where form is a tuple, since that's the only case we
# can have a dynamically defined shape.
if (
attr.value is not None
and isinstance(attr.form, tuple)
and axis_name in attr.form
):
if (
single_named_axis
and sum([isinstance(entry, str) for entry in attr.form]) > 1
):
continue

# Handle a repeated param_name in the form.
ret_dict[attr.name] = np.nonzero(np.asarray(attr.form) == axis_name)[0]
return ret_dict

def _get_multi_axis_params(self) -> list[str]:
"""Get a list of all multidimensional parameters."""
ret_list = []
for param in self:
# For each attribute, if the value is None, then bail, otherwise
# check if it's multidimensional
attr = getattr(self, param)
if (
attr.value is not None
and isinstance(attr.form, tuple)
and sum([isinstance(entry, str) for entry in attr.form]) > 1
):
ret_list.append(attr.name)
return ret_list

def _select_along_param_axis(self, param_dict: dict):
"""
Downselect values along a parameterized axis.
Expand Down Expand Up @@ -808,3 +890,136 @@ def _select_along_param_axis(self, param_dict: dict):
# here in the case of a repeated param_name in the form.
attr.value = attr.get_from_form(slice_dict)
attr.setter(self)

def _axis_add_helper(
self,
other,
axis_name: str,
other_inds: IntArray | None = None,
final_order: IntArray | None = None,
):
"""
Combine UVParameter objects with a single axis along an axis.

Parameters
----------
other : UVBase
The UVBase object to be added.
axis_name : str
The axis name (e.g. "Nblts", "Npols").
other_inds : np.ndarray of int, optional
Indices into the other object along this axis to include. If None,
include all indices.
final_order : np.ndarray of int, optional
Final ordering array giving the sort order after concatenation.

"""
update_params = self._get_uvparam_axis(axis_name, single_named_axis=True)
other_form_dict = {axis_name: other_inds}
for param, axis_list in update_params.items():
axis = axis_list[0]
new_array = np.concatenate(
[
getattr(self, param),
getattr(other, "_" + param).get_from_form(other_form_dict),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we couldn't just use np.take(getattr(other, param), other_inds, axis) here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe. I didn't write get_from_form, it looks like it tries to do a slicing if possible and if it can't it just uses np.take, so I think it's already somewhat optimized? But probably worth doing some performance testing on.

],
axis=axis,
)
if final_order is not None:
new_array = np.take(new_array, final_order, axis=axis)

setattr(self, param, new_array)

def _axis_pad_helper(self, axis_name: str, add_len: int):
"""
Pad out UVParameter objects with multiple dimensions along an axis.

Parameters
----------
axis_name : str
The axis name (e.g. "Nblts", "Npols").
add_len : int
The extra length to be padded on for this axis.

"""
update_params = self._get_uvparam_axis(axis_name)
multi_axis_params = self._get_multi_axis_params()
for param, axis_list in update_params.items():
if param not in multi_axis_params:
continue
Comment on lines +948 to +949
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we not also padding single-axis objects? This is confusing

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is confusing! Among other things, this is handling the case where we have data divided into chunks along more than one axis and we're trying to combine it. If you combine first along one axis and then start adding in the first chunk along the next axis you have to pad out the multi-dimensional arrays with zeros (and flags) for the corners you don't have actual information yet. That doesn't come up for single axis parameters.

Imagine a 2D array split into quadrants. The first two (along any axis) add fine. When you add in the third, you need to pad the fourth quadrant with zeros (and flags). Then when we add in the fourth, if all the data are zero and flagged we allow it to overwrite that quadrant. Does that make sense? Happy to add some more comments or docstrings if you think it'd be helpful.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah this makes sense. Super duper confusing haha

this_param_shape = getattr(self, param).shape
this_param_type = getattr(self, "_" + param).expected_type
bool_type = this_param_type is bool or bool in this_param_type
pad_shape = list(this_param_shape)
for ax in axis_list:
pad_shape[ax] = add_len
if bool_type:
pad_array = np.ones(tuple(pad_shape), dtype=bool)
else:
pad_array = np.zeros(tuple(pad_shape))
new_array = np.concatenate([getattr(self, param), pad_array], axis=ax)
if bool_type:
new_array = new_array.astype(np.bool_)
setattr(self, param, new_array)

def _fill_multi_helper(self, other, t2o_dict: dict, order_dict: dict):
"""
Fill UVParameter objects with multiple dimensions from the right side object.

Parameters
----------
other : UVBase
The UVBase object to be added.
t2o_dict : dict
dict giving the indices in the left object to be filled from the right
object for each axis (keys are axes, values are index arrays).
order_dict : dict
dict giving the final sort indices for each axis (keys are axes, values
are index arrays for sorting).
Comment on lines +976 to +978
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this sorting occurs in multiple functions, might it be better to just have a standalone "arbitrary axis sorting helper"?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, possibly. I think the reason I did it this way is that each UVParmeter is only affected by one of these functions, so the sorting is currently only done only once per parameter. It's just done in a different function depending on whether it's a single axis or multiaxis array. For the single axis array, the sorting can be done at the same time as the indexing with the np.take call, so I think that's why I did it this way. But open to refactoring if it's not a performance hit.


"""
multi_axis_params = self._get_multi_axis_params()
for param in multi_axis_params:
form = getattr(self, "_" + param).form
index_list = []
for axis in form:
index_list.append(t2o_dict[axis])
new_arr = getattr(self, param)
new_arr[np.ix_(*index_list)] = getattr(other, param)
setattr(self, param, new_arr)

# Fix ordering
for axis_ind, axis in enumerate(form):
if order_dict[axis] is not None:
unique_order_diffs = np.unique(np.diff(order_dict[axis]))
if np.array_equal(unique_order_diffs, np.array([1])):
# everything is already in order
continue
setattr(
self,
param,
np.take(getattr(self, param), order_dict[axis], axis=axis_ind),
)

def _axis_fast_concat_helper(self, other, axis_name: str):
"""
Concatenate UVParameter objects along an axis assuming no overlap.

Parameters
----------
other : UVBase
The UVBase object to be added.
axis_name : str
The axis name (e.g. "Nblts", "Npols").
"""
update_params = self._get_uvparam_axis(axis_name)
for param, axis_list in update_params.items():
axis = axis_list[0]
setattr(
self,
param,
np.concatenate(
[getattr(self, param)] + [getattr(obj, param) for obj in other],
axis=axis,
),
)
Loading
Loading