Skip to content

Commit f358202

Browse files
committed
Update dropbox recipes for ccf water level
1 parent 0b91f6f commit f358202

2 files changed

Lines changed: 20 additions & 20 deletions

File tree

examples/dropbox/dropbox_spec_ccf_downele.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ target_tz_label: PST
77

88

99
data:
10-
- name: ccf_ele
10+
- name: ccf_elev
1111
skip: False
1212
collect:
13-
file_pattern: "CC_Water_Levels_2024*.csv"
13+
file_pattern: "CC_Water_Levels_2025*.csv"
1414
wildcard: time_overlap # time_sharded | time_overlap | data | none.
1515
# If time_sharded, indicates non-overlapping timestamps that uses year chunking.
1616
# If time_overlap, indicates wildcards are ordered by date 20250531(lexicographic must = chronological)
@@ -20,14 +20,14 @@ data:
2020
recursive_search: False
2121
reader: read_ts # Names, pointers to code etc. To be fleshed out
2222
reader_args:
23-
names: ["datetime", "up_ele","down_ele"]
23+
names: ["datetime", "up_elev","down_elev"]
2424
na_values: ["(null)", "null", "NULL", ""]
2525
hint: "resort"
2626
force_regular: False
2727
merge_method: "ts_splice" # We will not call read_ts on wildcard, but rather glob and read and creat list of dfs
2828
merge_args:
2929
transition: prefer_first
30-
selector: down_ele # Usually a column name. When read_last_resort_csv is used, use null.
30+
selector: down_elev # Usually a column name. When read_last_resort_csv is used, use null.
3131
transforms:
3232
- name: dst_tz # SCADA is delivered local time. Often this is not needed from better data sources
3333
args:
@@ -42,13 +42,12 @@ data:
4242
heartbeat_freq: 60min
4343
metadata:
4444
station_id: clc
45-
structure_id: ccfb
46-
agency_id: ccfb
45+
agency_id: clc
4746
processor: dms
4847
agency: dwr
4948
source: dwr
5049
freq: None # None for irregular, "infer" for infer.
51-
param: elevation
50+
param: elev
5251
unit: ft
5352
subloc: down
5453
time_zone: ${target_tz}
@@ -58,16 +57,17 @@ data:
5857
# seems like this prevails, but we should prevent conflicts.
5958

6059
output:
60+
# repo_data_dir: ${modeling_data}/repo/continuous/proprietary/formatted # This is a practice directory so you don't go overwriting a real repo while developing the recipe
61+
# When omitted the target location is the repo_root as configured in dstore_config.yaml
6162
repo_name: proprietary_formatted # This is a pointer to an entry in dstore_config.yaml
6263
staging: # This is where the read and transformed time series will be staged.
6364
# It will be used to update the repo, but that uses update_repo and may not mean a full replacement
6465
dir: ${modeling_data}/repo/continuous/proprietary/staging
6566
write_args:
6667
float_format: "%0.3f"
67-
chunk_years: False
68+
chunk_years: True
6869
reconcile:
69-
repo_data_dir: ${modeling_data}/repo/continuous/proprietary/formatted # This is a practice directory so you don't go overwriting a real repo while developing the recipe
70-
# When omitted the target location is the repo_root as configured in dstore_config.yaml
70+
7171
prefer: staged # This is an argument to the update process:
7272
# - staged means "prefer the new stuff we staged"
7373
# - repo means "prefer the stuff in the repo, just top off with any new time stamps"

examples/dropbox/dropbox_spec_ccf_upele.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ target_tz_label: PST
77

88

99
data:
10-
- name: ccf_ele
10+
- name: ccf_elev
1111
skip: False
1212
collect:
13-
file_pattern: "CC_Water_Levels_2024*.csv"
13+
file_pattern: "CC_Water_Levels_2025*.csv"
1414
wildcard: time_overlap # time_sharded | time_overlap | data | none.
1515
# If time_sharded, indicates non-overlapping timestamps that uses year chunking.
1616
# If time_overlap, indicates wildcards are ordered by date 20250531(lexicographic must = chronological)
@@ -20,14 +20,14 @@ data:
2020
recursive_search: False
2121
reader: read_ts # Names, pointers to code etc. To be fleshed out
2222
reader_args:
23-
names: ["datetime", "up_ele", "down_ele"]
23+
names: ["datetime", "up_elev", "down_elev"]
2424
na_values: ["(null)", "null", "NULL", ""]
2525
hint: "resort"
2626
force_regular: False
2727
merge_method: "ts_splice" # We will not call read_ts on wildcard, but rather glob and read and creat list of dfs
2828
merge_args:
2929
transition: prefer_first
30-
selector: up_ele # Usually a column name. When read_last_resort_csv is used, use null.
30+
selector: up_elev # Usually a column name. When read_last_resort_csv is used, use null.
3131
transforms:
3232
- name: dst_tz # SCADA is delivered local time. Often this is not needed from better data sources
3333
args:
@@ -42,13 +42,12 @@ data:
4242
heartbeat_freq: 60min
4343
metadata:
4444
station_id: clc
45-
structure_id: ccfb
46-
agency_id: ccfb
45+
agency_id: clc
4746
processor: dms
4847
agency: dwr
4948
source: dwr
5049
freq: None # None for irregular, "infer" for infer.
51-
param: elevation
50+
param: elev
5251
unit: ft
5352
subloc: up
5453
time_zone: ${target_tz}
@@ -59,15 +58,16 @@ data:
5958

6059
output:
6160
repo_name: proprietary_formatted # This is a pointer to an entry in dstore_config.yaml
61+
# repo_data_dir: ${modeling_data}/repo/continuous/proprietary/formatted # This is a practice directory so you don't go overwriting a real repo while developing the recipe
62+
# When omitted the target location is the repo_root as configured in dstore_config.yaml
6263
staging: # This is where the read and transformed time series will be staged.
6364
# It will be used to update the repo, but that uses update_repo and may not mean a full replacement
6465
dir: ${modeling_data}/repo/continuous/proprietary/staging
6566
write_args:
6667
float_format: "%0.3f"
67-
chunk_years: False
68+
chunk_years: True
6869
reconcile:
69-
repo_data_dir: ${modeling_data}/repo/continuous/proprietary/formatted # This is a practice directory so you don't go overwriting a real repo while developing the recipe
70-
# When omitted the target location is the repo_root as configured in dstore_config.yaml
70+
7171
prefer: staged # This is an argument to the update process:
7272
# - staged means "prefer the new stuff we staged"
7373
# - repo means "prefer the stuff in the repo, just top off with any new time stamps"

0 commit comments

Comments
 (0)