@@ -11,115 +11,157 @@ class DeltaCDUIManager(tsdataui.TimeSeriesDataUIManager):
1111 Handles data catalog creation and time series extraction for area_id and crop combinations.
1212 """
1313
14- def __init__ (self , nc_file_path , geojson_file_path = None ):
14+ def __init__ (self , * nc_file_paths , ** kwargs ):
1515 """
1616 Initialize the DeltaCD UI Manager.
1717
1818 Parameters:
1919 -----------
20- nc_file_path : str
21- Path to the netCDF file containing DeltaCD data
22- geojson_file_path : str, optional
20+ nc_file_paths : str or list of str
21+ Paths to the netCDF files containing DeltaCD data. Can be a single path or multiple paths.
22+ geojson_file : str, optional
2323 Path to the GeoJSON file containing geographical information for area_ids
2424 """
25- self .nc_file_path = nc_file_path
26- self .geojson_file_path = geojson_file_path
27- self .ds = xr .open_dataset (nc_file_path )
25+ self .nc_file_paths = nc_file_paths
26+ self .geojson_file_path = kwargs .pop ("geojson_file" , None )
27+ self .datasets = {}
28+ dfcats = []
29+ for nc_file_path in self .nc_file_paths :
30+ if not nc_file_path .endswith ('.nc' ):
31+ raise ValueError (f"Invalid file type: { nc_file_path } . Expected a netCDF file (.nc)." )
32+ self .datasets [nc_file_path ] = xr .open_dataset (nc_file_path )
33+ dfcat = self .get_data_catalog_for_dataset (self .datasets [nc_file_path ], nc_file_path )
34+ dfcats .append (dfcat )
2835 self .gdf = None
29-
30- if geojson_file_path :
31- self .gdf = gpd .read_file (geojson_file_path )
32- self .gdf .rename (columns = {"OBJECTID" : "area_id" }, inplace = True )
33-
36+ if self .geojson_file_path :
37+ self .gdf = gpd .read_file (self .geojson_file_path )
38+ self .gdf .rename (columns = {"NEW_SUB" : "area_id" }, inplace = True )
39+
40+ # concatenate all data catalogs
41+ dfcat = pd .concat (dfcats , ignore_index = True )
42+ # merge with GeoDataFrame if available
43+ # If geojson is available, convert to GeoDataFrame
44+ if self .gdf is not None :
45+ # Convert area_id to string in both DataFrames before merging
46+ dfcat ['area_id' ] = dfcat ['area_id' ].astype (str )
47+ gdf_copy = self .gdf .copy ()
48+ gdf_copy ['area_id' ] = gdf_copy ['area_id' ].astype (str )
49+
50+ # Merge with geometry information based on area_id
51+ merged_df = pd .merge (dfcat , gdf_copy , on = "area_id" , how = "left" )
52+
53+ # Create GeoDataFrame
54+ catalog = gpd .GeoDataFrame (merged_df , geometry = "geometry" )
55+
56+ # Handle CRS properly
57+ if self .gdf .crs is not None :
58+ catalog .crs = self .gdf .crs
59+ else :
60+ catalog .set_crs (epsg = 4326 , inplace = True )
61+ else :
62+ # If no GeoDataFrame, just use the DataFrame
63+ catalog = dfcat
64+ self .dfcat = catalog
3465 # Initialize data cache
3566 self .data_cache = {}
3667
68+ kwargs ['filename_column' ] = "source"
69+ super ().__init__ (** kwargs )
3770 # Set up columns for visualization
38- self .color_cycle_column = "crop "
71+ self .color_cycle_column = "area_id "
3972 self .dashed_line_cycle_column = "variable"
4073 self .marker_cycle_column = "area_id"
4174
42- super ().__init__ (filename_column = "source" )
4375
4476 def get_data_catalog (self ):
77+ return self .dfcat
78+
79+ def get_data_catalog_for_dataset (self , ds , nc_file_path ):
4580 """
4681 Create a data catalog from the netCDF file.
4782 Each row represents a time series for a variable for an area_id and crop combination.
4883 Includes all possible combinations of area_id, crop, and variable.
4984 """
5085 # Get available variables, excluding coordinates
51- variables = [var for var in self .ds .data_vars ]
52- area_ids = self .ds .area_id .values
53- crops = self .ds .crop .values
86+ variables = list (ds .data_vars )
87+ dims = list (ds .dims )
88+ if "time" not in dims :
89+ raise ValueError (f"Dataset must contain a 'time' dimension. Dimensions available: { dims } " )
90+ if "area_id" not in dims :
91+ raise ValueError (f"Dataset must contain an 'area_id' dimension. Dimensions available: { dims } " )
92+ area_ids = ds .area_id .values
93+ other_dims = [dim for dim in dims if dim not in ["time" , "area_id" ]]
5494
5595 # Create all combinations using pandas products
5696 combinations = []
97+ columns = ['area_id' ]+ other_dims
5798 for area_id in area_ids :
58- for crop in crops :
99+ if 'crop' in dims :
100+ for crop in ds .crop .values :
101+ # Create combinations for each variable
102+ for var in variables :
103+ combinations .append ({
104+ 'area_id' : area_id ,
105+ 'crop' : crop ,
106+ 'variable' : var
107+ })
108+ else :
59109 for var in variables :
60110 combinations .append ({
61- 'area_id' : int (area_id ),
62- 'crop' : str (crop ),
111+ 'area_id' : area_id ,
63112 'variable' : var
64113 })
65114
66115 # Create base DataFrame with all combinations
67116 df = pd .DataFrame (combinations )
68117
69- # Get time range and other metadata for each combination
70- variable_units = {var : self .ds [var ].attrs .get ("units" , "" ) for var in variables }
71-
72118 # Add additional columns
119+ # More robust way to get units
120+ variable_units = {}
121+ for var in variables :
122+ try :
123+ # Try to get unit from the variable's attributes
124+ unit = ds [var ].attrs .get ("units" , "" )
125+ variable_units [var ] = unit
126+ print (f"Found unit '{ unit } ' for variable '{ var } '" )
127+ except Exception as e :
128+ print (f"Error getting unit for { var } : { e } " )
129+ variable_units [var ] = "" # Default to empty string
73130 df ['unit' ] = df ['variable' ].map (variable_units )
74131 df ['interval' ] = 'daily' # Assuming all data is daily, adjust as needed
75- df ['source' ] = self . nc_file_path
132+ df ['source' ] = nc_file_path
76133
77134 # Add time range information
78- times = pd .to_datetime (self . ds .time .values )
135+ times = pd .to_datetime (ds .time .values )
79136 df ['start_year' ] = str (times .min ().year )
80137 df ['max_year' ] = str (times .max ().year )
81-
82- # If geojson is available, convert to GeoDataFrame
83- if self .gdf is not None :
84- # Merge with geometry information based on area_id
85- merged_df = pd .merge (df , self .gdf , on = "area_id" , how = "left" )
86-
87- # Create GeoDataFrame
88- catalog = gpd .GeoDataFrame (merged_df , geometry = "geometry" )
89-
90- # Handle CRS properly
91- # Check if the GeoDataFrame already has a CRS
92- if self .gdf .crs is not None :
93- # GDF already has a CRS, no need to set it
94- pass
95- else :
96- # Set a default CRS if none exists
97- catalog .set_crs (epsg = 4326 , inplace = True )
98-
99- return catalog
100- else :
101- return df
138+ return df
102139
103140 def get_time_range (self , dfcat ):
104141 """Return the min and max time from the dataset"""
105- times = pd .to_datetime (self .ds .time .values )
106- return times .min (), times .max ()
142+ # return the min and max time for the dfcat
143+ starttime = pd .to_datetime (dfcat ['start_year' ].min ())
144+ endtime = pd .to_datetime (dfcat ['max_year' ].max ())
145+ return starttime , endtime
107146
108147 def build_station_name (self , r ):
109148 """Build a display name for the area_id and crop combination"""
149+ if 'crop' not in r or not r ['crop' ] or pd .isna (r ['crop' ]):
150+ return f"Area { r ['area_id' ]} "
110151 return f"Area { r ['area_id' ]} - { r ['crop' ]} "
111152
112- def get_table_column_width_map (self ):
153+ def _get_table_column_width_map (self ):
113154 """Define column widths for the data catalog table"""
114155 column_width_map = {
115156 "area_id" : "8%" ,
116- "crop" : "15%" ,
117157 "variable" : "12%" ,
118158 "unit" : "8%" ,
119159 "interval" : "10%" ,
120160 "start_year" : "10%" ,
121161 "max_year" : "10%" ,
122162 }
163+ if 'crop' in self .get_data_catalog ().columns :
164+ column_width_map ["crop" ] = "15%"
123165 return column_width_map
124166
125167 def get_table_filters (self ):
@@ -175,32 +217,47 @@ def get_data_for_time_range(self, r, time_range):
175217 Parameters:
176218 -----------
177219 r : pandas.Series
178- Row from data catalog containing area_id, crop, and variable
220+ Row from data catalog containing area_id and variable (crop may be optional)
179221 time_range : tuple
180222 Start and end time for data extraction
181-
223+
182224 Returns:
183225 --------
184226 tuple
185227 (time series DataFrame, unit, data type)
186228 """
187229 area_id = r ["area_id" ]
188- crop = r ["crop" ]
189230 variable = r ["variable" ]
190231 unit = r ["unit" ]
232+ filename = r ["source" ]
233+ ds = self .datasets [filename ]
234+ try :
235+ # Check if 'crop' exists in the row before trying to access it
236+ if 'crop' in r and not pd .isna (r ['crop' ]):
237+ crop = r ["crop" ]
238+ # Extract data from xarray for the specific area_id, crop, and variable
239+ data = ds [variable ].sel (area_id = area_id , crop = crop )
240+ else :
241+ # Handle case where crop is not in the data catalog
242+ data = ds [variable ].sel (area_id = area_id )
191243
192- # Extract data from xarray for the specific area_id, crop, and variable
193- data = self .ds [variable ].sel (area_id = area_id , crop = crop )
194-
195- # Convert to pandas Series and then DataFrame
196- df = data .to_pandas ().to_frame ()
244+ # Convert to pandas Series and then DataFrame
245+ df = data .to_pandas ().to_frame ()
246+ # Ensure the index is a datetime index
247+ if not isinstance (df .index , pd .DatetimeIndex ):
248+ # Try to convert the index to a datetime index
249+ df .index = pd .to_datetime (df .index )
197250
198- # Filter by time range if specified
199- if time_range and len (time_range ) == 2 :
200- start_time , end_time = time_range
201- df = df .loc [start_time :end_time ]
202-
203- return df , unit , "instantaneous"
251+ # Filter by time range if specified
252+ if time_range and len (time_range ) == 2 :
253+ start_time , end_time = time_range
254+ df = df .loc [start_time :end_time ]
255+
256+ return df , unit , "instantaneous"
257+ except Exception as e :
258+ # Handle any exception that occurs during data extraction
259+ print (f"Error extracting data for area_id={ area_id } , variable={ variable } : { e } " )
260+ return pd .DataFrame (), unit , "instantaneous"
204261
205262 def get_tooltips (self ):
206263 """Define tooltips for map visualization"""
@@ -222,11 +279,23 @@ def get_map_marker_columns(self):
222279 def create_curve (self , df , r , unit , file_index = None ):
223280 """Create a holoviews curve for plotting"""
224281 file_index_label = f"{ file_index } :" if file_index is not None else ""
225- crvlabel = f'{ file_index_label } Area { r ["area_id" ]} - { r ["crop" ]} : { r ["variable" ]} '
282+
283+ # Handle case where crop is missing or blank
284+ if 'crop' not in r or not r ['crop' ] or pd .isna (r ['crop' ]):
285+ crvlabel = f'{ file_index_label } Area { r ["area_id" ]} : { r ["variable" ]} '
286+ title = f'{ r ["variable" ]} @ Area { r ["area_id" ]} '
287+ else :
288+ crvlabel = f'{ file_index_label } Area { r ["area_id" ]} - { r ["crop" ]} : { r ["variable" ]} '
289+ title = f'{ r ["variable" ]} for { r ["crop" ]} @ Area { r ["area_id" ]} '
290+
226291 ylabel = f'{ r ["variable" ]} ({ unit } )'
227- title = f'{ r ["variable" ]} for { r ["crop" ]} @ Area { r ["area_id" ]} '
228292
229- crv = hv .Curve (df .iloc [:, [0 ]], label = crvlabel ).redim (value = crvlabel )
293+ # Create curve with appropriate data
294+ if df .empty :
295+ crv = hv .Curve (pd .DataFrame ({'x' : [], 'y' : []}), kdims = ['x' ], vdims = ['y' ], label = crvlabel ).redim (y = crvlabel )
296+ else :
297+ crv = hv .Curve (df , label = crvlabel ).redim (value = crvlabel )
298+
230299 return crv .opts (
231300 xlabel = "Time" ,
232301 ylabel = ylabel ,
@@ -249,7 +318,14 @@ def append_to_title_map(self, title_map, unit, r):
249318 else :
250319 value = ["" , "" ]
251320 value [0 ] = self ._append_value (r ["variable" ], value [0 ])
252- value [1 ] = self ._append_value (f'Area { r ["area_id" ]} - { r ["crop" ]} ' , value [1 ])
321+
322+ # Handle case where crop is missing or blank
323+ if 'crop' not in r or not r ['crop' ] or pd .isna (r ['crop' ]):
324+ location_str = f'Area { r ["area_id" ]} '
325+ else :
326+ location_str = f'Area { r ["area_id" ]} - { r ["crop" ]} '
327+
328+ value [1 ] = self ._append_value (location_str , value [1 ])
253329 title_map [unit ] = value
254330
255331 def create_title (self , v ):
@@ -259,19 +335,25 @@ def create_title(self, v):
259335
260336import click
261337@click .command ()
262- @click .argument ("detaw_output_file" , type = click .Path (exists = True , dir_okay = False ))
338+ @click .argument (
339+ "nc_files" ,
340+ nargs = - 1 ,
341+ type = click .Path (exists = True , dir_okay = False ),
342+ required = True ,
343+ )
263344@click .option (
264- "--geojson_file_path " ,
345+ "--geojson_file " ,
265346 type = click .Path (exists = True , dir_okay = False ),
266347 default = None ,
267348 help = "Path to the GeoJSON file containing area geometries" ,
268349)
269- def show_deltacd_ui (detaw_output_file , geojson_file_path = None ):
350+ def show_deltacd_ui (nc_files , geojson_file = None ):
270351 """
271352 Show the DeltaCD UI Manager for the specified netCDF file and GeoJSON file.
272353 """
273- dcd_ui = DeltaCDUIManager (detaw_output_file , geojson_file_path = geojson_file_path )
354+ dcd_ui = DeltaCDUIManager (* nc_files , geojson_file = geojson_file )
274355 from pydelmod .dvue import dataui
275356 import cartopy .crs as ccrs
276357 dui = dataui .DataUI (dcd_ui , station_id_column = "area_id" , crs = ccrs .epsg (26910 ))
277358 dui .create_view ().servable (title = "DeltaCD UI Manager" ).show ()
359+
0 commit comments