Skip to content

Commit a9d233e

Browse files
committed
fix: update DeltaCDUIManager initialization to support multiple netCDF file paths and improve GeoJSON handling
1 parent e5a149d commit a9d233e

2 files changed

Lines changed: 155 additions & 73 deletions

File tree

pydelmod/deltacduimgr.py

Lines changed: 154 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -11,115 +11,157 @@ class DeltaCDUIManager(tsdataui.TimeSeriesDataUIManager):
1111
Handles data catalog creation and time series extraction for area_id and crop combinations.
1212
"""
1313

14-
def __init__(self, nc_file_path, geojson_file_path=None):
14+
def __init__(self, *nc_file_paths, **kwargs):
1515
"""
1616
Initialize the DeltaCD UI Manager.
1717
1818
Parameters:
1919
-----------
20-
nc_file_path : str
21-
Path to the netCDF file containing DeltaCD data
22-
geojson_file_path : str, optional
20+
nc_file_paths : str or list of str
21+
Paths to the netCDF files containing DeltaCD data. Can be a single path or multiple paths.
22+
geojson_file : str, optional
2323
Path to the GeoJSON file containing geographical information for area_ids
2424
"""
25-
self.nc_file_path = nc_file_path
26-
self.geojson_file_path = geojson_file_path
27-
self.ds = xr.open_dataset(nc_file_path)
25+
self.nc_file_paths = nc_file_paths
26+
self.geojson_file_path = kwargs.pop("geojson_file", None)
27+
self.datasets = {}
28+
dfcats = []
29+
for nc_file_path in self.nc_file_paths:
30+
if not nc_file_path.endswith('.nc'):
31+
raise ValueError(f"Invalid file type: {nc_file_path}. Expected a netCDF file (.nc).")
32+
self.datasets[nc_file_path] = xr.open_dataset(nc_file_path)
33+
dfcat = self.get_data_catalog_for_dataset(self.datasets[nc_file_path], nc_file_path)
34+
dfcats.append(dfcat)
2835
self.gdf = None
29-
30-
if geojson_file_path:
31-
self.gdf = gpd.read_file(geojson_file_path)
32-
self.gdf.rename(columns={"OBJECTID": "area_id"}, inplace=True)
33-
36+
if self.geojson_file_path:
37+
self.gdf = gpd.read_file(self.geojson_file_path)
38+
self.gdf.rename(columns={"NEW_SUB": "area_id"}, inplace=True)
39+
40+
# concatenate all data catalogs
41+
dfcat = pd.concat(dfcats, ignore_index=True)
42+
# merge with GeoDataFrame if available
43+
# If geojson is available, convert to GeoDataFrame
44+
if self.gdf is not None:
45+
# Convert area_id to string in both DataFrames before merging
46+
dfcat['area_id'] = dfcat['area_id'].astype(str)
47+
gdf_copy = self.gdf.copy()
48+
gdf_copy['area_id'] = gdf_copy['area_id'].astype(str)
49+
50+
# Merge with geometry information based on area_id
51+
merged_df = pd.merge(dfcat, gdf_copy, on="area_id", how="left")
52+
53+
# Create GeoDataFrame
54+
catalog = gpd.GeoDataFrame(merged_df, geometry="geometry")
55+
56+
# Handle CRS properly
57+
if self.gdf.crs is not None:
58+
catalog.crs = self.gdf.crs
59+
else:
60+
catalog.set_crs(epsg=4326, inplace=True)
61+
else:
62+
# If no GeoDataFrame, just use the DataFrame
63+
catalog = dfcat
64+
self.dfcat = catalog
3465
# Initialize data cache
3566
self.data_cache = {}
3667

68+
kwargs['filename_column'] = "source"
69+
super().__init__(**kwargs)
3770
# Set up columns for visualization
38-
self.color_cycle_column = "crop"
71+
self.color_cycle_column = "area_id"
3972
self.dashed_line_cycle_column = "variable"
4073
self.marker_cycle_column = "area_id"
4174

42-
super().__init__(filename_column="source")
4375

4476
def get_data_catalog(self):
77+
return self.dfcat
78+
79+
def get_data_catalog_for_dataset(self, ds, nc_file_path):
4580
"""
4681
Create a data catalog from the netCDF file.
4782
Each row represents a time series for a variable for an area_id and crop combination.
4883
Includes all possible combinations of area_id, crop, and variable.
4984
"""
5085
# Get available variables, excluding coordinates
51-
variables = [var for var in self.ds.data_vars]
52-
area_ids = self.ds.area_id.values
53-
crops = self.ds.crop.values
86+
variables = list(ds.data_vars)
87+
dims = list(ds.dims)
88+
if "time" not in dims:
89+
raise ValueError(f"Dataset must contain a 'time' dimension. Dimensions available: {dims}")
90+
if "area_id" not in dims:
91+
raise ValueError(f"Dataset must contain an 'area_id' dimension. Dimensions available: {dims}")
92+
area_ids = ds.area_id.values
93+
other_dims = [dim for dim in dims if dim not in ["time", "area_id"]]
5494

5595
# Create all combinations using pandas products
5696
combinations = []
97+
columns = ['area_id']+other_dims
5798
for area_id in area_ids:
58-
for crop in crops:
99+
if 'crop' in dims:
100+
for crop in ds.crop.values:
101+
# Create combinations for each variable
102+
for var in variables:
103+
combinations.append({
104+
'area_id': area_id,
105+
'crop': crop,
106+
'variable': var
107+
})
108+
else:
59109
for var in variables:
60110
combinations.append({
61-
'area_id': int(area_id),
62-
'crop': str(crop),
111+
'area_id': area_id,
63112
'variable': var
64113
})
65114

66115
# Create base DataFrame with all combinations
67116
df = pd.DataFrame(combinations)
68117

69-
# Get time range and other metadata for each combination
70-
variable_units = {var: self.ds[var].attrs.get("units", "") for var in variables}
71-
72118
# Add additional columns
119+
# More robust way to get units
120+
variable_units = {}
121+
for var in variables:
122+
try:
123+
# Try to get unit from the variable's attributes
124+
unit = ds[var].attrs.get("units", "")
125+
variable_units[var] = unit
126+
print(f"Found unit '{unit}' for variable '{var}'")
127+
except Exception as e:
128+
print(f"Error getting unit for {var}: {e}")
129+
variable_units[var] = "" # Default to empty string
73130
df['unit'] = df['variable'].map(variable_units)
74131
df['interval'] = 'daily' # Assuming all data is daily, adjust as needed
75-
df['source'] = self.nc_file_path
132+
df['source'] = nc_file_path
76133

77134
# Add time range information
78-
times = pd.to_datetime(self.ds.time.values)
135+
times = pd.to_datetime(ds.time.values)
79136
df['start_year'] = str(times.min().year)
80137
df['max_year'] = str(times.max().year)
81-
82-
# If geojson is available, convert to GeoDataFrame
83-
if self.gdf is not None:
84-
# Merge with geometry information based on area_id
85-
merged_df = pd.merge(df, self.gdf, on="area_id", how="left")
86-
87-
# Create GeoDataFrame
88-
catalog = gpd.GeoDataFrame(merged_df, geometry="geometry")
89-
90-
# Handle CRS properly
91-
# Check if the GeoDataFrame already has a CRS
92-
if self.gdf.crs is not None:
93-
# GDF already has a CRS, no need to set it
94-
pass
95-
else:
96-
# Set a default CRS if none exists
97-
catalog.set_crs(epsg=4326, inplace=True)
98-
99-
return catalog
100-
else:
101-
return df
138+
return df
102139

103140
def get_time_range(self, dfcat):
104141
"""Return the min and max time from the dataset"""
105-
times = pd.to_datetime(self.ds.time.values)
106-
return times.min(), times.max()
142+
# return the min and max time for the dfcat
143+
starttime = pd.to_datetime(dfcat['start_year'].min())
144+
endtime = pd.to_datetime(dfcat['max_year'].max())
145+
return starttime, endtime
107146

108147
def build_station_name(self, r):
109148
"""Build a display name for the area_id and crop combination"""
149+
if 'crop' not in r or not r['crop'] or pd.isna(r['crop']):
150+
return f"Area {r['area_id']}"
110151
return f"Area {r['area_id']} - {r['crop']}"
111152

112-
def get_table_column_width_map(self):
153+
def _get_table_column_width_map(self):
113154
"""Define column widths for the data catalog table"""
114155
column_width_map = {
115156
"area_id": "8%",
116-
"crop": "15%",
117157
"variable": "12%",
118158
"unit": "8%",
119159
"interval": "10%",
120160
"start_year": "10%",
121161
"max_year": "10%",
122162
}
163+
if 'crop' in self.get_data_catalog().columns:
164+
column_width_map["crop"] = "15%"
123165
return column_width_map
124166

125167
def get_table_filters(self):
@@ -175,32 +217,47 @@ def get_data_for_time_range(self, r, time_range):
175217
Parameters:
176218
-----------
177219
r : pandas.Series
178-
Row from data catalog containing area_id, crop, and variable
220+
Row from data catalog containing area_id and variable (crop may be optional)
179221
time_range : tuple
180222
Start and end time for data extraction
181-
223+
182224
Returns:
183225
--------
184226
tuple
185227
(time series DataFrame, unit, data type)
186228
"""
187229
area_id = r["area_id"]
188-
crop = r["crop"]
189230
variable = r["variable"]
190231
unit = r["unit"]
232+
filename = r["source"]
233+
ds = self.datasets[filename]
234+
try:
235+
# Check if 'crop' exists in the row before trying to access it
236+
if 'crop' in r and not pd.isna(r['crop']):
237+
crop = r["crop"]
238+
# Extract data from xarray for the specific area_id, crop, and variable
239+
data = ds[variable].sel(area_id=area_id, crop=crop)
240+
else:
241+
# Handle case where crop is not in the data catalog
242+
data = ds[variable].sel(area_id=area_id)
191243

192-
# Extract data from xarray for the specific area_id, crop, and variable
193-
data = self.ds[variable].sel(area_id=area_id, crop=crop)
194-
195-
# Convert to pandas Series and then DataFrame
196-
df = data.to_pandas().to_frame()
244+
# Convert to pandas Series and then DataFrame
245+
df = data.to_pandas().to_frame()
246+
# Ensure the index is a datetime index
247+
if not isinstance(df.index, pd.DatetimeIndex):
248+
# Try to convert the index to a datetime index
249+
df.index = pd.to_datetime(df.index)
197250

198-
# Filter by time range if specified
199-
if time_range and len(time_range) == 2:
200-
start_time, end_time = time_range
201-
df = df.loc[start_time:end_time]
202-
203-
return df, unit, "instantaneous"
251+
# Filter by time range if specified
252+
if time_range and len(time_range) == 2:
253+
start_time, end_time = time_range
254+
df = df.loc[start_time:end_time]
255+
256+
return df, unit, "instantaneous"
257+
except Exception as e:
258+
# Handle any exception that occurs during data extraction
259+
print(f"Error extracting data for area_id={area_id}, variable={variable}: {e}")
260+
return pd.DataFrame(), unit, "instantaneous"
204261

205262
def get_tooltips(self):
206263
"""Define tooltips for map visualization"""
@@ -222,11 +279,23 @@ def get_map_marker_columns(self):
222279
def create_curve(self, df, r, unit, file_index=None):
223280
"""Create a holoviews curve for plotting"""
224281
file_index_label = f"{file_index}:" if file_index is not None else ""
225-
crvlabel = f'{file_index_label}Area {r["area_id"]} - {r["crop"]}: {r["variable"]}'
282+
283+
# Handle case where crop is missing or blank
284+
if 'crop' not in r or not r['crop'] or pd.isna(r['crop']):
285+
crvlabel = f'{file_index_label}Area {r["area_id"]}: {r["variable"]}'
286+
title = f'{r["variable"]} @ Area {r["area_id"]}'
287+
else:
288+
crvlabel = f'{file_index_label}Area {r["area_id"]} - {r["crop"]}: {r["variable"]}'
289+
title = f'{r["variable"]} for {r["crop"]} @ Area {r["area_id"]}'
290+
226291
ylabel = f'{r["variable"]} ({unit})'
227-
title = f'{r["variable"]} for {r["crop"]} @ Area {r["area_id"]}'
228292

229-
crv = hv.Curve(df.iloc[:, [0]], label=crvlabel).redim(value=crvlabel)
293+
# Create curve with appropriate data
294+
if df.empty:
295+
crv = hv.Curve(pd.DataFrame({'x': [], 'y': []}), kdims=['x'], vdims=['y'], label=crvlabel).redim(y=crvlabel)
296+
else:
297+
crv = hv.Curve(df, label=crvlabel).redim(value=crvlabel)
298+
230299
return crv.opts(
231300
xlabel="Time",
232301
ylabel=ylabel,
@@ -249,7 +318,14 @@ def append_to_title_map(self, title_map, unit, r):
249318
else:
250319
value = ["", ""]
251320
value[0] = self._append_value(r["variable"], value[0])
252-
value[1] = self._append_value(f'Area {r["area_id"]} - {r["crop"]}', value[1])
321+
322+
# Handle case where crop is missing or blank
323+
if 'crop' not in r or not r['crop'] or pd.isna(r['crop']):
324+
location_str = f'Area {r["area_id"]}'
325+
else:
326+
location_str = f'Area {r["area_id"]} - {r["crop"]}'
327+
328+
value[1] = self._append_value(location_str, value[1])
253329
title_map[unit] = value
254330

255331
def create_title(self, v):
@@ -259,19 +335,25 @@ def create_title(self, v):
259335

260336
import click
261337
@click.command()
262-
@click.argument("detaw_output_file", type=click.Path(exists=True, dir_okay=False))
338+
@click.argument(
339+
"nc_files",
340+
nargs=-1,
341+
type=click.Path(exists=True, dir_okay=False),
342+
required=True,
343+
)
263344
@click.option(
264-
"--geojson_file_path",
345+
"--geojson_file",
265346
type=click.Path(exists=True, dir_okay=False),
266347
default=None,
267348
help="Path to the GeoJSON file containing area geometries",
268349
)
269-
def show_deltacd_ui(detaw_output_file, geojson_file_path=None):
350+
def show_deltacd_ui(nc_files, geojson_file=None):
270351
"""
271352
Show the DeltaCD UI Manager for the specified netCDF file and GeoJSON file.
272353
"""
273-
dcd_ui = DeltaCDUIManager(detaw_output_file, geojson_file_path=geojson_file_path)
354+
dcd_ui = DeltaCDUIManager(*nc_files, geojson_file=geojson_file)
274355
from pydelmod.dvue import dataui
275356
import cartopy.crs as ccrs
276357
dui=dataui.DataUI(dcd_ui, station_id_column="area_id", crs=ccrs.epsg(26910))
277358
dui.create_view().servable(title="DeltaCD UI Manager").show()
359+

tests/ex_deltacdui.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
pn.extension()
2525
pn.panel(pn.Row(map_view, sizing_mode="stretch_both")).servable(title="GeoJSON Map with Background Tiles").show()
2626
# %%
27-
detaw_output_file = "d:/delta/deltacd_inputs/historical/outputs/detawoutput_dsm2.nc"
27+
detaw_output_file = "d:/delta/deltacd_inputs/historical/outputs/dcd_dsm2.nc"
2828
import xarray as xr
2929
# %%
3030
ds = xr.open_dataset(detaw_output_file)

0 commit comments

Comments
 (0)