Skip to content

Commit 26d2c26

Browse files
committed
Add stats to validate_xr_variable
1 parent 1a95f82 commit 26d2c26

File tree

4 files changed

+42
-54
lines changed

4 files changed

+42
-54
lines changed

tests/conftest.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,7 @@ def empty_mdio_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
7878
return tmp_path_factory.mktemp(r"empty_mdio_dir")
7979

8080

81-
#
8281
# Uncomment the function below for local debugging
83-
#
8482
# @pytest.fixture(scope="session")
8583
# def tmp_path_factory() -> pytest.TempPathFactory:
8684
# """Custom tmp_path_factory implementation for local debugging."""

tests/integration/test_create_empty.py

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -92,57 +92,39 @@ def _validate_empty_mdio_dataset(cls, ds: xr_Dataset, has_headers: bool, is_velo
9292
assert ds.sizes == {"inline": 345, "crossline": 188, "time": 1501}
9393

9494
# Validate the dimension coordinate variables
95-
validate_xr_variable(ds, "inline", {"inline": 345}, UNITS_NONE, np.int32, range(1, 346), get_values)
96-
validate_xr_variable(ds, "crossline", {"crossline": 188}, UNITS_NONE, np.int32, range(1, 189), get_values)
97-
validate_xr_variable(ds, "time", {"time": 1501}, UNITS_SECOND, np.int32, range(0, 3002, 2), get_values)
95+
validate_xr_variable(ds, "inline", {"inline": 345}, UNITS_NONE, np.int32, False, range(1, 346), get_values)
96+
validate_xr_variable(
97+
ds, "crossline", {"crossline": 188}, UNITS_NONE, np.int32, False, range(1, 189), get_values
98+
)
99+
validate_xr_variable(ds, "time", {"time": 1501}, UNITS_SECOND, np.int32, False, range(0, 3002, 2), get_values)
98100

99101
# Validate the non-dimensional coordinate variables (should be empty for empty dataset)
100-
validate_xr_variable(ds, "cdp_x", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64, None, None)
101-
validate_xr_variable(ds, "cdp_y", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64, None, None)
102+
validate_xr_variable(ds, "cdp_x", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64)
103+
validate_xr_variable(ds, "cdp_y", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64)
102104

103105
if has_headers:
104106
# Validate the headers (should be empty for empty dataset)
105107
# Infer the dtype from segy_spec and ignore endianness
106108
header_dtype = get_teapot_segy_spec().trace.header.dtype.newbyteorder("native")
107-
validate_xr_variable(ds, "headers", {"inline": 345, "crossline": 188}, UNITS_NONE, header_dtype, None, None)
108-
validate_xr_variable(
109-
ds,
110-
"segy_file_header",
111-
dims={},
112-
units=UNITS_NONE,
113-
data_type=np.dtype("U1"),
114-
expected_values=None,
115-
actual_value_generator=None,
116-
)
109+
validate_xr_variable(ds, "headers", {"inline": 345, "crossline": 188}, UNITS_NONE, header_dtype)
110+
validate_xr_variable(ds, "segy_file_header", dims={}, units=UNITS_NONE, data_type=np.dtype("U1"))
117111
else:
118112
assert "headers" not in ds.variables
119113
assert "segy_file_header" not in ds.variables
120114

121115
# Validate the trace mask (should be all True for empty dataset)
122-
validate_xr_variable(ds, "trace_mask", {"inline": 345, "crossline": 188}, UNITS_NONE, np.bool_, None, None)
116+
validate_xr_variable(ds, "trace_mask", {"inline": 345, "crossline": 188}, UNITS_NONE, np.bool_)
123117
trace_mask = ds["trace_mask"].values
124118
assert not np.any(trace_mask), "All traces should be marked as dead in empty dataset"
125119

126120
# Validate the velocity or amplitude data (should be empty)
127121
if is_velocity:
128122
validate_xr_variable(
129-
ds,
130-
"velocity",
131-
{"inline": 345, "crossline": 188, "time": 1501},
132-
UNITS_METER_PER_SECOND,
133-
np.float32,
134-
None,
135-
None,
123+
ds, "velocity", {"inline": 345, "crossline": 188, "time": 1501}, UNITS_METER_PER_SECOND, np.float32
136124
)
137125
else:
138126
validate_xr_variable(
139-
ds,
140-
"amplitude",
141-
{"inline": 345, "crossline": 188, "time": 1501},
142-
UNITS_NONE,
143-
np.float32,
144-
None,
145-
None,
127+
ds, "amplitude", {"inline": 345, "crossline": 188, "time": 1501}, UNITS_NONE, np.float32
146128
)
147129

148130
@classmethod
@@ -273,17 +255,19 @@ def test_overwrite_behavior(self, empty_mdio_dir: Path) -> None:
273255
def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
274256
"""Test showing how to populate empty dataset."""
275257
# Open an empty PostStack3DVelocityTime dataset with SEG-Y 1.0 headers
276-
# NOTES:
258+
#
277259
# When this empty dataset was created from the 'PostStack3DVelocityTime' template and dimensions,
278260
# * 'inline', 'crossline', and 'time' dimension coordinate variables were created and pre-populated
261+
# NOTE: the 'time' units are specified in the template, so they are not None in this case.
279262
# * 'cdp_x', 'cdp_y' non-dimensional coordinate variables were created
280-
# * 'amplitude' variable was created (the name of this variable is specified in the template)
281-
# HACK: in this example, we will use this variable to store the velocity data
263+
# NOTE: the 'cdp_x' and 'cdp_y' units are specified in the template, so they are not None in this case.
264+
# * 'velocity' variable was created (the name of this default variable is specified in the template)
265+
# NOTE: the 'velocity' units are specified in the template, so they are not None in this case.
282266
# * 'trace_mask' variable was created and pre-populated with 'False' fill values
283267
# (all traces are marked as dead)
284268
# * 'headers' and 'segy_file_header' variables were created (if the dataset was created with
285269
# headers not None). The 'headers' variable structured datatype is defined by the HeaderSpec
286-
# that was used to create the empty MDIO
270+
# that was used to create the empty MDIO
287271
# * dataset attribute called 'attributes' was created
288272
ds = open_mdio(mdio_with_headers)
289273

@@ -320,7 +304,9 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
320304
# 4) Populate dataset's trace mask (optional)
321305
ds.trace_mask[:] = ~np.isnan(velocity[:, :, 0])
322306

323-
# 5) Set coordinate and data variable units (optional)
307+
# 5) If the units were not set in the template or you want to change the coordinate and data variable units
308+
# you can set the unitsV1 attribute for the coordinate and data variables (optional).
309+
# If you are happy with the units specified in the template, you should skip this step.
324310
ds.time.attrs["unitsV1"] = TimeUnitModel(time=TimeUnitEnum.MILLISECOND).model_dump(mode="json")
325311

326312
ds.cdp_x.attrs["unitsV1"] = LengthUnitModel(length=LengthUnitEnum.FOOT).model_dump(mode="json")
@@ -376,7 +362,7 @@ def test_populate_empty_dataset(self, mdio_with_headers: Path) -> None:
376362
output_path_mdio = mdio_with_headers.parent / "populated_empty.mdio"
377363
to_mdio(ds, output_path=output_path_mdio, mode="w", compute=True)
378364

379-
# 9) Convert the populated emptyMDIO to SEG-Y
365+
# 9) Convert the populated empty MDIO to SEG-Y
380366
if "headers" in ds.variables:
381367
# Select the SEG-Y standard to use for the conversion
382368
custom_segy_spec = get_segy_standard(1.0)

tests/integration/test_segy_roundtrip_teapot.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,15 @@ def test_grid(self, teapot_mdio_tmp: Path, teapot_segy_spec: SegySpec) -> None:
243243
ds = open_mdio(teapot_mdio_tmp)
244244

245245
# Validate the dimension coordinate variables
246-
validate_xr_variable(ds, "inline", {"inline": 345}, UNITS_NONE, np.int32, range(1, 346), get_values)
247-
validate_xr_variable(ds, "crossline", {"crossline": 188}, UNITS_NONE, np.int32, range(1, 189), get_values)
248-
validate_xr_variable(ds, "time", {"time": 1501}, UNITS_SECOND, np.int32, range(0, 3002, 2), get_values)
246+
validate_xr_variable(ds, "inline", {"inline": 345}, UNITS_NONE, np.int32, False, range(1, 346), get_values)
247+
validate_xr_variable(
248+
ds, "crossline", {"crossline": 188}, UNITS_NONE, np.int32, False, range(1, 189), get_values
249+
)
250+
validate_xr_variable(ds, "time", {"time": 1501}, UNITS_SECOND, np.int32, False, range(0, 3002, 2), get_values)
249251

250252
# Validate the non-dimensional coordinate variables
251-
validate_xr_variable(ds, "cdp_x", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64, None, None)
252-
validate_xr_variable(ds, "cdp_y", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64, None, None)
253+
validate_xr_variable(ds, "cdp_x", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64)
254+
validate_xr_variable(ds, "cdp_y", {"inline": 345, "crossline": 188}, UNITS_METER, np.float64)
253255

254256
# Validate the headers
255257
# We have a custom set of headers since we used customize_segy_specs()
@@ -262,22 +264,17 @@ def test_grid(self, teapot_mdio_tmp: Path, teapot_segy_spec: SegySpec) -> None:
262264
{"inline": 345, "crossline": 188},
263265
UNITS_NONE,
264266
data_type.newbyteorder("native"), # mdio saves with machine endian, spec could be different endian
267+
False,
265268
range(1, 346),
266269
get_inline_header_values,
267270
)
268271

269272
# Validate the trace mask
270-
validate_xr_variable(ds, "trace_mask", {"inline": 345, "crossline": 188}, UNITS_NONE, np.bool, None, None)
273+
validate_xr_variable(ds, "trace_mask", {"inline": 345, "crossline": 188}, UNITS_NONE, np.bool)
271274

272275
# validate the amplitude data
273276
validate_xr_variable(
274-
ds,
275-
"amplitude",
276-
{"inline": 345, "crossline": 188, "time": 1501},
277-
UNITS_NONE,
278-
np.float32,
279-
None,
280-
None,
277+
ds, "amplitude", {"inline": 345, "crossline": 188, "time": 1501}, UNITS_NONE, np.float32, True
281278
)
282279

283280
def test_inline_reads(self, teapot_mdio_tmp: Path) -> None:

tests/integration/testing_helpers.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ def validate_xr_variable( # noqa PLR0913
2424
dims: dict[int],
2525
units: AllUnitModel,
2626
data_type: np.dtype,
27-
expected_values: range | None,
27+
has_stats: bool = False,
28+
expected_values: range | None = None,
2829
actual_value_generator: Callable[[xr.DataArray], np.ndarray] | None = None,
2930
) -> None:
3031
"""Validate the properties of a variable in an Xarray dataset."""
@@ -47,10 +48,16 @@ def validate_xr_variable( # noqa PLR0913
4748
else:
4849
assert data_type == v.dtype
4950

50-
assert v.attrs.get("statsV1", None) is None, "StatsV1 should be empty for empty dataset variables"
51+
stats = v.attrs.get("statsV1", None)
52+
if has_stats:
53+
assert stats is not None, "StatsV1 should not be empty for dataset variables with stats"
54+
else:
55+
assert stats is None, "StatsV1 should be empty for dataset variables without stats"
5156

5257
if units is not None:
53-
assert v.attrs == {"unitsV1": units.model_dump(mode="json")}
58+
units_v1 = v.attrs.get("unitsV1", None)
59+
assert units_v1 is not None, "UnitsV1 should not be empty for dataset variables with units"
60+
assert units_v1 == units.model_dump(mode="json")
5461
else:
5562
assert "unitsV1" not in v.attrs, "UnitsV1 should not exist for unit-unaware variables"
5663

0 commit comments

Comments
 (0)