Skip to content

Commit d5d2874

Browse files
Achaadpriit111
andauthored
feat: implement envisat gtc (#44)
* feat: implement envisat gtc * ci: update tox dependencies * tests: fix test_xarray_backends.py * Added direct parsing of Envisat files. This corrects slant range mistake from previous version for terrain correction and it is required for further development. * fix: small import error * style: code style improvements * style: code style improvements * style: code style improvements * style: code style improvements * tests: fix tests * Fix typing * A bit of code style changes * Fix sonarqube issues --------- Co-authored-by: Anton Perepelenko <Anton.Perepelenko@cgi.com> Co-authored-by: Priit Pender <priit@alloca.ee>
1 parent 8c54620 commit d5d2874

8 files changed

Lines changed: 229 additions & 21 deletions

File tree

asar_xarray/asar.py

Lines changed: 124 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,21 @@
33
import os
44
from typing import Dict, Any
55

6+
import pandas as pd
67
import xarray as xr
78
import numpy as np
89
from osgeo import gdal
910
from xarray.backends import AbstractDataStore
1011
from xarray.core.types import ReadBuffer
11-
from asar_xarray import reader, utils
12+
from asar_xarray import reader, utils, envisat_direct
1213
from loguru import logger
1314

1415
from asar_xarray.derived_subdatasets_metadata import process_derived_subdatasets_metadata
1516
from asar_xarray.general_metadata import process_general_metadata
1617
from asar_xarray.records_metadata import process_records_metadata
1718

1819

19-
def get_attributes(gdal_dataset: gdal.Dataset) -> Dict[str, Any]:
20+
def get_metadata(gdal_dataset: gdal.Dataset) -> Dict[str, Any]:
2021
"""
2122
Build xarray attributes from gdal dataset to be used in xarray.
2223
@@ -54,24 +55,110 @@ def open_asar_dataset(filepath: str | os.PathLike[Any] | ReadBuffer[Any] | Abstr
5455
gdal_dataset: gdal.Dataset = reader.get_gdal_dataset(filepath)
5556

5657
# Extract metadata attributes
57-
attributes = get_attributes(gdal_dataset)
58+
metadata = get_metadata(gdal_dataset)
5859

59-
# Read pixel data from the dataset
60-
data = gdal_dataset.ReadAsArray()
60+
# Duplicate, read directly from file, as gdal does not parse some necessary metadata
61+
metadata["direct_parse"] = envisat_direct.parse_direct(filepath)
6162

6263
# Create an xarray Dataset with pixel data and metadata attributes
63-
dataset: xr.Dataset = xr.Dataset(
64-
data_vars={'pixel_values': (('y', 'x'), data)},
65-
coords={
66-
'x': np.arange(data.shape[1]),
67-
'y': np.arange(data.shape[0])
68-
},
69-
attrs=attributes
70-
)
64+
dataset: xr.Dataset = create_dataset(metadata, filepath)
7165

7266
return dataset
7367

7468

69+
def create_dataset(metadata: dict[str, Any], filepath: str) -> xr.Dataset:
70+
"""
71+
Create an xarray Dataset from ASAR metadata and file path.
72+
73+
This function constructs the coordinates, attributes, and data variables
74+
for an ASAR product, using the provided metadata and file path.
75+
76+
:param metadata: Dictionary containing ASAR product metadata.
77+
:param filepath: Path to the ASAR dataset file.
78+
:return: An xarray Dataset with pixel data, coordinates, and attributes.
79+
"""
80+
number_of_samples = metadata["line_length"]
81+
product_first_line_utc_time = metadata["first_line_time"]
82+
product_last_line_utc_time = metadata["last_line_time"]
83+
print(product_first_line_utc_time)
84+
85+
number_of_lines = metadata["records"]["main_processing_params"]["num_output_lines"]
86+
azimuth_time_interval = 1 / metadata["records"]["main_processing_params"]["image_parameters"]["prf_value"][0]
87+
range_sampling_rate = metadata["records"]["main_processing_params"]["range_samp_rate"]
88+
image_slant_range_time = metadata["direct_parse"]["slant_time_first"] * 1e-9
89+
90+
number_of_bursts = 0
91+
92+
attrs = {
93+
"family_name": "Envisat",
94+
"number": 1,
95+
"mode": 1,
96+
"swaths": metadata["swath"],
97+
"orbit_number": metadata["abs_orbit"],
98+
"relative_orbit_number": metadata["rel_orbit"],
99+
"pass": metadata["pass"],
100+
"transmitter_receiver_polarisations": metadata["mds1_tx_rx_polar"],
101+
"product_type": "SLC",
102+
"start_time": product_first_line_utc_time,
103+
"stop_time": product_last_line_utc_time,
104+
105+
"radar_frequency": metadata["records"]["main_processing_params"]["radar_freq"] / 1e9,
106+
"ascending_node_time": "",
107+
"azimuth_pixel_spacing": metadata["records"]["main_processing_params"]["azimuth_spacing"],
108+
"range_pixel_spacing": metadata["records"]["main_processing_params"]["range_samp_rate"],
109+
"product_first_line_utc_time": product_first_line_utc_time,
110+
"product_last_line_utc_time": product_last_line_utc_time,
111+
"azimuth_time_interval": azimuth_time_interval,
112+
"image_slant_range_time": image_slant_range_time,
113+
"range_sampling_rate": range_sampling_rate,
114+
"incidence_angle_mid_swath": metadata["direct_parse"]["incidence_angle_center"],
115+
"metadata": metadata
116+
}
117+
118+
azimuth_time = compute_azimuth_time(
119+
product_first_line_utc_time, product_last_line_utc_time, number_of_lines
120+
)
121+
122+
if number_of_bursts == 0:
123+
swap_dims = {"line": "azimuth_time", "pixel": "slant_range_time"}
124+
else:
125+
raise NotImplementedError(
126+
"Burst processing is not implemented yet."
127+
)
128+
129+
coords: dict[str, Any] = {
130+
"pixel": np.arange(0, number_of_samples, dtype=int),
131+
"line": np.arange(0, number_of_lines, dtype=int),
132+
# set "units" explicitly as CF conventions don't support "nanoseconds".
133+
# See: https://github.com/pydata/xarray/issues/4183#issuecomment-685200043
134+
"azimuth_time": (
135+
"line",
136+
azimuth_time,
137+
{},
138+
{"units": f"microseconds since {azimuth_time[0]}"},
139+
),
140+
}
141+
142+
slant_range_time = np.linspace(
143+
image_slant_range_time,
144+
image_slant_range_time + (number_of_samples - 1) / range_sampling_rate,
145+
number_of_samples,
146+
)
147+
coords["slant_range_time"] = ("pixel", slant_range_time)
148+
149+
data = xr.open_dataarray(filepath, engine='rasterio')
150+
data.encoding.clear()
151+
data = data.squeeze("band").drop_vars(["band", "spatial_ref"])
152+
data = data.rename({"y": "line", "x": "pixel"})
153+
data = data.assign_coords(coords)
154+
data = data.swap_dims(swap_dims)
155+
156+
data.attrs.update(attrs)
157+
data.encoding.update({})
158+
159+
return xr.Dataset(attrs=attrs, data_vars={"measurements": data})
160+
161+
75162
def get_chirp_parameters(dataset: gdal.Dataset) -> dict[str, Any]:
76163
"""
77164
Parse dataset metadata for chirp parameters.
@@ -134,3 +221,27 @@ def get_chirp_cal_pulse_info(metadata: dict[str, str]) -> list[dict[str, Any]]:
134221

135222
# Convert the dictionary of dictionaries to the list of dictionaries
136223
return list(cal_info_dict.values())
224+
225+
226+
def compute_azimuth_time(
227+
product_first_line_utc_time: np.datetime64,
228+
product_last_line_utc_time: np.datetime64,
229+
number_of_lines: int
230+
) -> np.ndarray:
231+
"""
232+
Compute an array of azimuth times for each line in the ASAR product.
233+
234+
This function generates a sequence of evenly spaced time values between the
235+
first and last line UTC times, corresponding to the number of lines in the product.
236+
237+
:param product_first_line_utc_time: UTC time of the first line (as np.datetime64).
238+
:param product_last_line_utc_time: UTC time of the last line (as np.datetime64).
239+
:param number_of_lines: Total number of lines in the product.
240+
:return: Numpy array of azimuth times for each line.
241+
"""
242+
azimuth_time = pd.date_range(
243+
start=product_first_line_utc_time,
244+
end=product_last_line_utc_time,
245+
periods=number_of_lines
246+
)
247+
return np.asarray(azimuth_time.values, dtype='datetime64[ns]')

asar_xarray/envisat_direct.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""Module for parsing Envisat direct access data structures and extracting metadata."""
2+
import struct
3+
from typing import Any
4+
5+
6+
def parse_int(s: str) -> int:
7+
"""Parse an integer value from a string representation of a field."""
8+
s = s.replace("<bytes>", "")
9+
s = s[s.index("=") + 1:]
10+
# print(s)
11+
return int(s)
12+
13+
14+
class EnvisatADS:
15+
"""Class representing an Envisat Annotation Data Set (ADS) descriptor."""
16+
17+
def __init__(self, buffer: bytes) -> None:
18+
"""Initialize the ADS descriptor from a buffer of bytes."""
19+
str_arr = buffer.decode("ascii").split("\n")
20+
name = str_arr[0].replace("DS_NAME=\"", "").replace("\"", "").strip()
21+
self.name = name
22+
self.num = parse_int(str_arr[5])
23+
self.size = parse_int(str_arr[4])
24+
self.offset = parse_int(str_arr[3])
25+
26+
def __str__(self) -> str:
27+
"""Return a string representation of the ADS descriptor."""
28+
return "Envisat ADS: \"{}\" {} {} {}".format(self.name, self.offset, self.size, self.num)
29+
30+
31+
def parse_direct(path: str) -> dict[str, Any]:
32+
"""
33+
Parse an Envisat product file and extract relevant metadata fields.
34+
35+
Args:
36+
----
37+
path (str): Path to the Envisat product file.
38+
39+
returns: Dictionary containing extracted metadata fields.
40+
"""
41+
metadata = {}
42+
file_buffer = None
43+
with open(path, "rb") as fp:
44+
file_buffer = fp.read()
45+
46+
# read main product header and confirm sph size location
47+
mph_size = 1247
48+
mph_str = file_buffer[0:mph_size].decode("ascii")
49+
assert (mph_str.index("SPH_SIZE") == 1104)
50+
sph_size = parse_int(mph_str[1104:1104 + 20])
51+
52+
sph_buf = file_buffer[mph_size:mph_size + sph_size]
53+
dsd_size = 280
54+
dsd_num = 18
55+
dsd_buf = sph_buf[sph_size - dsd_size * dsd_num:]
56+
57+
for i in range(dsd_num):
58+
ads = EnvisatADS(dsd_buf[i * dsd_size:(i + 1) * dsd_size])
59+
if ads.name == "GEOLOCATION GRID ADS":
60+
rec_size = 521
61+
assert ((ads.size // ads.num) == rec_size)
62+
geoloc_buf = file_buffer[ads.offset:ads.offset + ads.size]
63+
middle_idx = ads.num // 2
64+
geoloc_record = geoloc_buf[middle_idx * rec_size: (middle_idx + 1) * rec_size]
65+
# Geolocation Grid ADSRs header
66+
header_size = 12 + 1 + 4 + 4 + 4
67+
# tiepoints, 11 of big endian floats for each of the following:
68+
# samp numbers, slant range times, angles, lats, longs
69+
block_size = 11 * 4
70+
slant_time_offset = header_size + 1 * block_size
71+
incidence_angle_offset = header_size + 2 * block_size
72+
# adjust to middle of 11
73+
incidence_angle_offset += 5 * 4
74+
75+
slant_time_first = struct.unpack(">f", geoloc_record[slant_time_offset:slant_time_offset + 4])[0]
76+
incidence_angle_middle = \
77+
struct.unpack(">f", geoloc_record[incidence_angle_offset:incidence_angle_offset + 4])[0]
78+
79+
metadata["slant_time_first"] = slant_time_first
80+
metadata["incidence_angle_center"] = incidence_angle_middle
81+
82+
if ads.name == "MAIN PROCESSING PARAMS ADS":
83+
84+
main_processing_params_buf = file_buffer[ads.offset:ads.offset + ads.size]
85+
if len(main_processing_params_buf) == 10069:
86+
sigma_buf = main_processing_params_buf[2029:2029 + 4020]
87+
gammma_buf = main_processing_params_buf[2029 + 4020:]
88+
89+
metadata["sigma_calib_vector"] = sigma_buf
90+
metadata["gamma_calib_vector"] = gammma_buf
91+
92+
return metadata

asar_xarray/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77
test_file = ('../tests/resources'
88
'/ASA_IMS_1PNESA20040109_194924_000000182023_00157_09730_0000.N1')
99
dataset = xr.open_dataset(test_file, engine='asar')
10-
print(dataset)
10+
print(dataset.attrs)

environment.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,8 @@ dependencies:
1414
- lxml>=5.3.0
1515
- mypy>=1.15.0
1616
- loguru>=0.7.0
17+
- pandas-stubs >=2.0.0
1718
- pip:
1819
- pytest-stub>=1.1.0
20+
- rasterio>=1.4.0
21+
- rioxarray>=0.19.0

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ classifiers = [
1717
'Topic :: Scientific/Engineering :: GIS'
1818
]
1919
readme = 'README.md'
20-
license = { text = 'Apache-2.0' }
20+
license-files = ['LICENSE']
2121
keywords = ['ASAR', 'ENVISAT', 'xarray', 'earth-observation', 'remote-sensing', 'radar', 'satellite-imagery', 'sar',
2222
'synthetic-aperture-radar']
2323
requires-python = '>=3.12'

tests/test_asar.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ def test_open_asar_dataset(resources_dir: str) -> None:
77
filepath = os.path.join(resources_dir, 'ASA_IMS_1PNESA20040109_194924_000000182023_00157_09730_0000.N1')
88
dataset = asar.open_asar_dataset(filepath)
99
assert dataset is not None
10-
assert dataset.dims['x'] == 5174
11-
assert dataset.dims['y'] == 30181
12-
assert dataset.data_vars['pixel_values'].data.shape == (30181, 5174)
10+
assert dataset.dims['slant_range_time'] == 5174
11+
assert dataset.dims['azimuth_time'] == 30181
12+
assert dataset.data_vars['measurements'].data.shape == (30181, 5174)

tests/test_xaray_backends.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ def test_open_dataset(resources_dir: str) -> None:
99
file = os.path.join(resources_dir, 'ASA_IMS_1PNESA20040109_194924_000000182023_00157_09730_0000.N1')
1010
dataset = xr.open_dataset(file, engine='asar')
1111
assert dataset is not None
12-
assert dataset.dims['x'] == 5174
13-
assert dataset.dims['y'] == 30181
14-
assert dataset.data_vars['pixel_values'].data.shape == (30181, 5174)
12+
assert dataset.dims['slant_range_time'] == 5174
13+
assert dataset.dims['azimuth_time'] == 30181
14+
assert dataset.data_vars['measurements'].data.shape == (30181, 5174)

tox.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ deps =
1919
pytest
2020
pytest-cov
2121
mypy
22+
rasterio>=1.4.0
23+
rioxarray>=0.19.0
2224
commands =
2325
coverage run -m pytest -s
2426
coverage xml

0 commit comments

Comments
 (0)