|
| 1 | +"""Command line tool to splice two NetCDF time series of nudging data""" |
| 2 | + |
| 3 | +import click |
| 4 | +import numpy as np |
| 5 | +import pandas as pd |
| 6 | +import xarray as xr |
| 7 | +from vtools.functions.merge import ts_splice |
| 8 | + |
| 9 | + |
| 10 | +def load_nc_series(nc_file, start_date): |
| 11 | + """ |
| 12 | + Load NetCDF tracer data and convert to pandas DataFrame. |
| 13 | +
|
| 14 | + Parameters |
| 15 | + ---------- |
| 16 | + nc_file : str |
| 17 | + Path to NetCDF file. |
| 18 | + start_date : str |
| 19 | + Start date corresponding to time=0 in the NetCDF file. |
| 20 | +
|
| 21 | + Returns |
| 22 | + ------- |
| 23 | + df : pandas.DataFrame |
| 24 | + DataFrame indexed by datetime with flattened spatial dimensions. |
| 25 | + ds : xarray.Dataset |
| 26 | + Original dataset used as template. |
| 27 | + """ |
| 28 | + |
| 29 | + ds = xr.open_dataset(nc_file) |
| 30 | + |
| 31 | + time_seconds = ds["time"].values |
| 32 | + start = pd.to_datetime(start_date) |
| 33 | + |
| 34 | + times = start + pd.to_timedelta(time_seconds, unit="s") |
| 35 | + |
| 36 | + data = ds["tracer_concentration"].values |
| 37 | + shape = data.shape |
| 38 | + |
| 39 | + flat = data.reshape(shape[0], -1) |
| 40 | + |
| 41 | + df = pd.DataFrame(flat, index=times) |
| 42 | + |
| 43 | + return df, ds |
| 44 | + |
| 45 | + |
| 46 | +def rebuild_dataset(spliced_df, template_ds, start_date): |
| 47 | + """ |
| 48 | + Convert spliced DataFrame back into xarray Dataset. |
| 49 | +
|
| 50 | + Parameters |
| 51 | + ---------- |
| 52 | + spliced_df : pandas.DataFrame |
| 53 | + Spliced time series data. |
| 54 | + template_ds : xarray.Dataset |
| 55 | + Template dataset for dimensions and metadata. |
| 56 | + start_date : str |
| 57 | + Start date corresponding to time=0. |
| 58 | +
|
| 59 | + Returns |
| 60 | + ------- |
| 61 | + xarray.Dataset |
| 62 | + Dataset ready to write to NetCDF. |
| 63 | + """ |
| 64 | + |
| 65 | + start = pd.to_datetime(start_date) |
| 66 | + |
| 67 | + time_seconds = (spliced_df.index - start).total_seconds().astype(np.float32) |
| 68 | + |
| 69 | + node = template_ds.dims["node"] |
| 70 | + levels = template_ds.dims["nLevels"] |
| 71 | + one = template_ds.dims["one"] |
| 72 | + |
| 73 | + data = spliced_df.values.reshape(len(time_seconds), node, levels, one) |
| 74 | + |
| 75 | + ds_out = xr.Dataset() |
| 76 | + |
| 77 | + ds_out["time"] = xr.DataArray( |
| 78 | + time_seconds, dims=["time"], attrs=template_ds["time"].attrs |
| 79 | + ) |
| 80 | + |
| 81 | + ds_out["map_to_global_node"] = template_ds["map_to_global_node"] |
| 82 | + |
| 83 | + ds_out["tracer_concentration"] = xr.DataArray( |
| 84 | + data, |
| 85 | + dims=["time", "node", "nLevels", "one"], |
| 86 | + attrs=template_ds["tracer_concentration"].attrs, |
| 87 | + ) |
| 88 | + |
| 89 | + return ds_out |
| 90 | + |
| 91 | + |
| 92 | +def splice_two_files( |
| 93 | + file1, file2, start_date1, start_date2, transition_date, transition |
| 94 | +): |
| 95 | + """ |
| 96 | + Splice two NetCDF time series. |
| 97 | +
|
| 98 | + Parameters |
| 99 | + ---------- |
| 100 | + file1 : str |
| 101 | + First NetCDF file. |
| 102 | + file2 : str |
| 103 | + Second NetCDF file. |
| 104 | + start_date1 : str |
| 105 | + Start date of first dataset. |
| 106 | + start_date2 : str |
| 107 | + Start date of second dataset. |
| 108 | + transition_date : str |
| 109 | + Timestamp where splice occurs. |
| 110 | + transition : {'prefer_first', 'prefer_last'} |
| 111 | + Transition rule passed to ts_splice. |
| 112 | +
|
| 113 | + Returns |
| 114 | + ------- |
| 115 | + xarray.Dataset |
| 116 | + Spliced dataset. |
| 117 | + """ |
| 118 | + |
| 119 | + df1, ds1 = load_nc_series(file1, start_date1) |
| 120 | + df2, ds2 = load_nc_series(file2, start_date2) |
| 121 | + |
| 122 | + spliced = ts_splice([df1, df2], transition=transition) |
| 123 | + |
| 124 | + transition_ts = pd.to_datetime(transition_date) |
| 125 | + |
| 126 | + if transition == "prefer_first": |
| 127 | + spliced = spliced.loc[:transition_ts].combine_first(df2.loc[transition_ts:]) |
| 128 | + |
| 129 | + if transition == "prefer_last": |
| 130 | + spliced = df1.loc[:transition_ts].combine_first(spliced.loc[transition_ts:]) |
| 131 | + |
| 132 | + ds_out = rebuild_dataset(spliced, ds1, start_date1) |
| 133 | + |
| 134 | + return ds_out |
| 135 | + |
| 136 | + |
| 137 | +@click.command() |
| 138 | +@click.option("--file1", required=True, help="Path to first NetCDF file.") |
| 139 | +@click.option("--file2", required=True, help="Path to second NetCDF file.") |
| 140 | +@click.option( |
| 141 | + "--start-date1", required=True, help="Start date corresponding to time=0 in file1." |
| 142 | +) |
| 143 | +@click.option( |
| 144 | + "--start-date2", required=True, help="Start date corresponding to time=0 in file2." |
| 145 | +) |
| 146 | +@click.option("--transition-date", required=True, help="Date where splicing occurs.") |
| 147 | +@click.option( |
| 148 | + "--transition", |
| 149 | + default="prefer_last", |
| 150 | + type=click.Choice(["prefer_first", "prefer_last"]), |
| 151 | + help="Transition rule for ts_splice.", |
| 152 | +) |
| 153 | +@click.option( |
| 154 | + "--output-dir", required=True, help="Directory to write spliced NetCDF file." |
| 155 | +) |
| 156 | +@click.option("--output-name", default=None, help="Optional output filename.") |
| 157 | +def splice_netcdf_cli( |
| 158 | + file1, |
| 159 | + file2, |
| 160 | + start_date1, |
| 161 | + start_date2, |
| 162 | + transition_date, |
| 163 | + transition, |
| 164 | + output_dir, |
| 165 | + output_name, |
| 166 | +): |
| 167 | + """ |
| 168 | + Splice two NetCDF datasets containing tracer time series. |
| 169 | +
|
| 170 | + The output starts at the beginning of file1 and ends at the |
| 171 | + end of file2. The transition_date marks the splice boundary. |
| 172 | + """ |
| 173 | + |
| 174 | + ds_out = splice_two_files( |
| 175 | + file1, file2, start_date1, start_date2, transition_date, transition |
| 176 | + ) |
| 177 | + |
| 178 | + import os |
| 179 | + |
| 180 | + os.makedirs(output_dir, exist_ok=True) |
| 181 | + |
| 182 | + if output_name: |
| 183 | + outfile = os.path.join(output_dir, output_name) |
| 184 | + else: |
| 185 | + outfile = os.path.join(output_dir, "spliced_output.nc") |
| 186 | + |
| 187 | + ds_out.to_netcdf(outfile) |
| 188 | + |
| 189 | + print("Wrote:", outfile) |
| 190 | + |
| 191 | + |
| 192 | +if __name__ == "__main__": |
| 193 | + splice_netcdf_cli() |
0 commit comments