You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
dropbox_home: temp/dropbox # This is a placeholder. In practice, this would be set to the actual path of the dropbox home directory on the user's system.
4
+
modeling_data: //cnrastore-bdo/Modeling_Data/
5
+
target_tz: "Etc/GMT+8"# Odd, but the correct one in POSIX/pandas for PST.
6
+
target_tz_label: PST
7
+
8
+
9
+
data:
10
+
- name: ccf_ele
11
+
skip: False
12
+
collect:
13
+
file_pattern: "CC_Water_Levels_2024*.csv"
14
+
wildcard: time_overlap # time_sharded | time_overlap | data | none.
15
+
# If time_sharded, indicates non-overlapping timestamps that uses year chunking.
16
+
# If time_overlap, indicates wildcards are ordered by date 20250531(lexicographic must = chronological)
17
+
# If data, represents different station/variable. Look for metadata in the file and station lists.
18
+
# If none, a wildcard is an error. # todo: test
19
+
location: "${modeling_data}/hydraulic_structures/incoming"# This could be understood as a default
20
+
recursive_search: False
21
+
reader: read_ts # Names, pointers to code etc. To be fleshed out
22
+
reader_args:
23
+
names: ["datetime", "up_ele","down_ele"]
24
+
na_values: ["(null)", "null", "NULL", ""]
25
+
hint: "resort"
26
+
force_regular: False
27
+
merge_method: "ts_splice"# We will not call read_ts on wildcard, but rather glob and read and creat list of dfs
28
+
merge_args:
29
+
transition: prefer_first
30
+
selector: down_ele # Usually a column name. When read_last_resort_csv is used, use null.
31
+
transforms:
32
+
- name: dst_tz # SCADA is delivered local time. Often this is not needed from better data sources
33
+
args:
34
+
src_tz: US/Pacific
35
+
target_tz: ${target_tz}
36
+
- name: coarsen
37
+
args:
38
+
grid: 2min
39
+
preserve_vals: [0.0]
40
+
qwidth: 0.01
41
+
hyst: 0.5
42
+
heartbeat_freq: 60min
43
+
metadata:
44
+
station_id: clc
45
+
structure_id: ccfb
46
+
agency_id: ccfb
47
+
processor: dms
48
+
agency: dwr
49
+
source: dwr
50
+
freq: None # None for irregular, "infer" for infer.
51
+
param: elevation
52
+
unit: ft
53
+
subloc: down
54
+
time_zone: ${target_tz}
55
+
time_zone_label: ${target_tz_label}
56
+
latitude: registry_lookup # projected will be inferred. User changes should be made to projected, not latitude/longitude.
57
+
longitude: registry_lookup # what happens when this and registry entry have conflicting metadata for something like latitude.
58
+
# seems like this prevails, but we should prevent conflicts.
59
+
60
+
output:
61
+
repo_name: proprietary_formatted # This is a pointer to an entry in dstore_config.yaml
62
+
staging: # This is where the read and transformed time series will be staged.
63
+
# It will be used to update the repo, but that uses update_repo and may not mean a full replacement
repo_data_dir: ${modeling_data}/repo/continuous/proprietary/formatted # This is a practice directory so you don't go overwriting a real repo while developing the recipe
70
+
# When omitted the target location is the repo_root as configured in dstore_config.yaml
71
+
prefer: staged # This is an argument to the update process:
72
+
# - staged means "prefer the new stuff we staged"
73
+
# - repo means "prefer the stuff in the repo, just top off with any new time stamps"
74
+
allow_new_series: true
75
+
inspection: # update parameters. These should be included but seldom change
76
+
recent_years: 3# Up to 3 year old data will be checked whenever update is performed
77
+
p3: 0.15# probability of a spot check for 3-10 ago, differences will trigger full replacment of all years
78
+
p10: 0.05# pprobability of a spot check for data 10+ years old, differences will trigger full replacment of all years
dropbox_home: temp/dropbox # This is a placeholder. In practice, this would be set to the actual path of the dropbox home directory on the user's system.
4
+
modeling_data: //cnrastore-bdo/Modeling_Data/
5
+
target_tz: "Etc/GMT+8"# Odd, but the correct one in POSIX/pandas for PST.
6
+
target_tz_label: PST
7
+
8
+
9
+
data:
10
+
- name: ccf_ele
11
+
skip: False
12
+
collect:
13
+
file_pattern: "CC_Water_Levels_2024*.csv"
14
+
wildcard: time_overlap # time_sharded | time_overlap | data | none.
15
+
# If time_sharded, indicates non-overlapping timestamps that uses year chunking.
16
+
# If time_overlap, indicates wildcards are ordered by date 20250531(lexicographic must = chronological)
17
+
# If data, represents different station/variable. Look for metadata in the file and station lists.
18
+
# If none, a wildcard is an error. # todo: test
19
+
location: "${modeling_data}/hydraulic_structures/incoming"# This could be understood as a default
20
+
recursive_search: False
21
+
reader: read_ts # Names, pointers to code etc. To be fleshed out
22
+
reader_args:
23
+
names: ["datetime", "up_ele", "down_ele"]
24
+
na_values: ["(null)", "null", "NULL", ""]
25
+
hint: "resort"
26
+
force_regular: False
27
+
merge_method: "ts_splice"# We will not call read_ts on wildcard, but rather glob and read and creat list of dfs
28
+
merge_args:
29
+
transition: prefer_first
30
+
selector: up_ele # Usually a column name. When read_last_resort_csv is used, use null.
31
+
transforms:
32
+
- name: dst_tz # SCADA is delivered local time. Often this is not needed from better data sources
33
+
args:
34
+
src_tz: US/Pacific
35
+
target_tz: ${target_tz}
36
+
- name: coarsen
37
+
args:
38
+
grid: 2min
39
+
preserve_vals: [0.0]
40
+
qwidth: 0.01
41
+
hyst: 0.5
42
+
heartbeat_freq: 60min
43
+
metadata:
44
+
station_id: clc
45
+
structure_id: ccfb
46
+
agency_id: ccfb
47
+
processor: dms
48
+
agency: dwr
49
+
source: dwr
50
+
freq: None # None for irregular, "infer" for infer.
51
+
param: elevation
52
+
unit: ft
53
+
subloc: up
54
+
time_zone: ${target_tz}
55
+
time_zone_label: ${target_tz_label}
56
+
latitude: registry_lookup # projected will be inferred. User changes should be made to projected, not latitude/longitude.
57
+
longitude: registry_lookup # what happens when this and registry entry have conflicting metadata for something like latitude.
58
+
# seems like this prevails, but we should prevent conflicts.
59
+
60
+
output:
61
+
repo_name: proprietary_formatted # This is a pointer to an entry in dstore_config.yaml
62
+
staging: # This is where the read and transformed time series will be staged.
63
+
# It will be used to update the repo, but that uses update_repo and may not mean a full replacement
repo_data_dir: ${modeling_data}/repo/continuous/proprietary/formatted # This is a practice directory so you don't go overwriting a real repo while developing the recipe
70
+
# When omitted the target location is the repo_root as configured in dstore_config.yaml
71
+
prefer: staged # This is an argument to the update process:
72
+
# - staged means "prefer the new stuff we staged"
73
+
# - repo means "prefer the stuff in the repo, just top off with any new time stamps"
74
+
allow_new_series: true
75
+
inspection: # update parameters. These should be included but seldom change
76
+
recent_years: 3# Up to 3 year old data will be checked whenever update is performed
77
+
p3: 0.15# probability of a spot check for 3-10 ago, differences will trigger full replacment of all years
78
+
p10: 0.05# pprobability of a spot check for data 10+ years old, differences will trigger full replacment of all years
0 commit comments