Skip to content

Commit affcd9a

Browse files
authored
Merge pull request #57 from LSSTDESC/issue/47/cardinalreduce
Carinal reducer
2 parents 5e0f7fa + 9dcf333 commit affcd9a

12 files changed

Lines changed: 1395 additions & 17 deletions

examples/cardinal_project.yaml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
Project:
2+
3+
Name: test_cardinal
4+
5+
# Include other configuration files
6+
Includes:
7+
- examples/cardinal_project_library.yaml
8+
9+
PathTemplates: {}
10+
11+
CommonPaths:
12+
root: /global/cfs/cdirs/lsst/groups/PZ/Cardinal
13+
scratch_root: "{root}"
14+
catalogs_dir: "{root}/parquet_files"
15+
project: test_cardinal
16+
sim_version: v1
17+
18+
# Baseline configuraiton, included in others by default
19+
Baseline:
20+
catalog_tag: cardinal
21+
pipelines: ['all']
22+
file_aliases: # Set the training and test files
23+
test: test_file_100k
24+
train: train_file_100k
25+
train_zCOSMOS: train_file_zCOSMOS_100k
26+
wide: wide_file_full
27+
deep: deep_file_full
28+
spec: spec_file_full
29+
30+
# These define the variant configurations for the various parts of the analysis
31+
Flavors:
32+
- Flavor:
33+
name: train_cosmos
34+
pipelines: ['pz', 'tomography']
35+
file_aliases: # Set the training and test files
36+
test: test_file_100k
37+
train: train_file_zCOSMOS_100k
38+
- Flavor:
39+
name: gpz_gl
40+
pipelines: ['pz'] # only run the pz pipeline
41+
pipeline_overrides: # Override specifics for particular pipelines
42+
default:
43+
kwargs:
44+
algorithms: ['gpz'] # Only run gpz
45+
inform:
46+
inform_gpz:
47+
gpz_method: GL
48+
49+
# These are variables that we iterate over when running over entire catalogs
50+
IterationVars:
51+
healpix:
52+
- 427
53+
- 428
54+
- 429
55+
- 430
56+
- 431
57+
- 432
58+
- 73
59+
- 122
60+
- 99
61+
- 16
62+
- 58
63+
- 277
64+
- 346
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
# This is the "library" of common defintions shared between similar projects
2+
3+
# Templates we use to define catalogs
4+
#
5+
# A CatalogTemplate can be given several interpolants to resolve to a specific Catalog
6+
#
7+
# Some of the interpolants [{catalogs_dir}, {project}, {sim_version}]
8+
# are defined in the Project CommonPaths block
9+
#
10+
# Possibles values of {selection} are defined the the library "Selections" block
11+
#
12+
# Possible values of {flavor} are defined the the Project "Flavors" block
13+
Catalogs:
14+
# The input "Truth" catalog
15+
- CatalogTemplate:
16+
name: truth
17+
path_template: "{catalogs_dir}/Chinchilla-3-triofile.{healpix}.parquet"
18+
iteration_vars: ['healpix']
19+
# Catalogs that have been reduced using a particular selection
20+
- CatalogTemplate:
21+
name: reduced
22+
path_template: "{catalogs_dir}/{project}_{selection}/{healpix}/Chinchilla-3-triofile.pq"
23+
iteration_vars: ['healpix']
24+
# Data output from running some degraders on the original catalog
25+
# Note that the {basename} can be used to select which degraded output we want
26+
- CatalogTemplate:
27+
name: degraded
28+
path_template: "{catalogs_dir}/{project}_{selection}_{flavor}/{healpix}/{basename}"
29+
iteration_vars: ['healpix']
30+
31+
32+
# This file describes the templates we use to define files and specfic instances
33+
# of files
34+
#
35+
# A FileTemplate can be given several interpolants to resolve to a specific FileInstance
36+
#
37+
# Some of the interpolants [{catalogs_dir}, {project}]
38+
# are defined in the Project CommonPaths block
39+
#
40+
# Possibles values of {selection} are defined the the library "Selections" block
41+
Files:
42+
# Testing data file
43+
- FileTemplate:
44+
name: test_file_100k
45+
path_template: "{catalogs_dir}/test/{project}_{selection}_baseline_100k.hdf5"
46+
# Generic training data file
47+
- FileTemplate:
48+
name: train_file_100k
49+
path_template: "{catalogs_dir}/test/{project}_{selection}_baseline_100k.hdf5"
50+
- FileTemplate:
51+
name: train_file_200k
52+
path_template: "{catalogs_dir}/test/{project}_{selection}_baseline_200k.hdf5"
53+
- FileTemplate:
54+
name: train_file_10
55+
path_template: "{catalogs_dir}/test/{project}_{selection}_baseline_10.hdf5"
56+
- FileTemplate:
57+
name: test_split_file
58+
path_template: "{catalogs_dir}/test/{project}_{selection}_baseline_split_test_10.hdf5"
59+
- FileTemplate:
60+
name: train_split_file
61+
path_template: "{catalogs_dir}/train/{project}_{selection}_baseline_split_train_10.hdf5"
62+
- FileTemplate:
63+
name: deep_file_full
64+
path_template: "{catalogs_dir}/som/{project}_deep_data_full.hdf5"
65+
- FileTemplate:
66+
name: deep_file_small
67+
path_template: "{catalogs_dir}/som/{project}_deep_data_small.hdf5"
68+
- FileTemplate:
69+
name: wide_file_full
70+
path_template: "{catalogs_dir}/som/{project}_wide_data_full.hdf5"
71+
- FileTemplate:
72+
name: wide_file_small
73+
path_template: "{catalogs_dir}/som/{project}_wide_data_small.hdf5"
74+
- FileTemplate:
75+
name: spec_file_full
76+
path_template: "{catalogs_dir}/som/{project}_spec_data_full.hdf5"
77+
- FileTemplate:
78+
name: spec_file_small
79+
path_template: "{catalogs_dir}/som/{project}_spec_data_small.hdf5"
80+
81+
82+
# The next plots list all available algorithms of various types and assigns short names each of then
83+
#
84+
85+
# These describe all the algorithms that emulate spectroscopic selections
86+
SpecSelections:
87+
- SpecSelection:
88+
name: zCOSMOS
89+
Select: SpecSelection_zCOSMOS
90+
Module: rail.creation.degraders.spectroscopic_selections
91+
92+
93+
# These describe all the algorithms that estimate PZ
94+
PZAlgorithms:
95+
- PZAlgorithm:
96+
name: trainz
97+
Estimate: TrainZEstimator
98+
Inform: TrainZInformer
99+
Module: rail.estimation.algos.train_z
100+
- PZAlgorithm:
101+
name: simplenn
102+
Estimate: SklNeurNetEstimator
103+
Inform: SklNeurNetInformer
104+
Module: rail.estimation.algos.sklearn_neurnet
105+
- PZAlgorithm:
106+
name: fzboost
107+
Estimate: FlexZBoostEstimator
108+
Inform: FlexZBoostInformer
109+
Module: rail.estimation.algos.flexzboost
110+
- PZAlgorithm:
111+
name: knn
112+
Estimate: KNearNeighEstimator
113+
Inform: KNearNeighInformer
114+
Module: rail.estimation.algos.k_nearneigh
115+
- PZAlgorithm:
116+
name: gpz
117+
Estimate: GPzEstimator
118+
Inform: GPzInformer
119+
Module: rail.estimation.algos.gpz
120+
121+
122+
# These describe all the algorithms that classify objects into tomographic bins
123+
Classifiers:
124+
- Classifier:
125+
name: equal_count
126+
Classify: EqualCountClassifier
127+
Module: rail.estimation.algos.equal_count
128+
- Classifier:
129+
name: uniform_binning
130+
Classify: UniformBinningClassifier
131+
Module: rail.estimation.algos.uniform_binning
132+
133+
134+
# These describe the error models we use in the truth_to_observed pipeline
135+
ErrorModels:
136+
- ErrorModel:
137+
name: lsst
138+
ErrorModel: LSSTErrorModel
139+
Module: rail.creation.degraders.photometric_errors
140+
- ErrorModel:
141+
name: roman
142+
ErrorModel: RomanErrorModel
143+
Module: rail.creation.degraders.photometric_errors
144+
145+
146+
# These describe the ways we can sub-sample the data
147+
Subsamplers:
148+
- Subsampler:
149+
name: random_subsampler
150+
Subsample: RandomSubsampler
151+
Module: rail.projects.subsampler
152+
- Subsampler:
153+
name: multi_catalog_subsampler
154+
Subsample: MultiCatalogSubsample
155+
Module: rail.projects.subsampler
156+
157+
158+
# These describe the ways we can reduce the data
159+
Reducers:
160+
- Reducer:
161+
name: cardinal
162+
Reduce: CardinalReducer
163+
Module: rail.projects.reducer
164+
165+
166+
# These describe the various data analysis pipelines
167+
Pipelines:
168+
- PipelineTemplate:
169+
name: truth_to_observed
170+
pipeline_class: rail.pipelines.degradation.truth_to_observed.TruthToObservedPipeline
171+
input_catalog_template: reduced
172+
output_catalog_template: degraded
173+
kwargs:
174+
error_models: ['all']
175+
selectors: ['all']
176+
blending: true
177+
- PipelineTemplate:
178+
name: prepare
179+
pipeline_class: rail.pipelines.utils.prepare_observed.PrepareObservedPipeline
180+
input_catalog_template: reduced
181+
output_catalog_template: degraded
182+
- PipelineTemplate:
183+
name: photometric_errors
184+
pipeline_class: rail.pipelines.degradation.apply_phot_errors.ApplyPhotErrorsPipeline
185+
input_catalog_template: reduced
186+
output_catalog_template: degraded
187+
kwargs:
188+
error_models: ['all']
189+
- PipelineTemplate:
190+
name: spec_selection
191+
input_catalog_template: degraded
192+
output_catalog_template: degraded
193+
input_catalog_basename: output_dereddener_errors.pq
194+
pipeline_class: rail.pipelines.degradation.spectroscopic_selection_pipeline.SpectroscopicSelectionPipeline
195+
kwargs:
196+
selectors: ['all']
197+
- PipelineTemplate:
198+
name: inform
199+
pipeline_class: rail.pipelines.estimation.inform_all.InformPipeline
200+
input_catalog_template: degraded
201+
output_catalog_template: degraded
202+
input_file_templates:
203+
input:
204+
flavor: baseline
205+
tag: train
206+
kwargs:
207+
algorithms: ['all']
208+
- PipelineTemplate:
209+
name: estimate
210+
pipeline_class: rail.pipelines.estimation.estimate_all.EstimatePipeline
211+
input_catalog_template: degraded
212+
output_catalog_template: degraded
213+
input_file_templates:
214+
input:
215+
flavor: baseline
216+
tag: test
217+
kwargs:
218+
algorithms: ['all']
219+
- PipelineTemplate:
220+
name: evaluate
221+
pipeline_class: rail.pipelines.evaluation.evaluate_all.EvaluationPipeline
222+
input_catalog_template: degraded
223+
output_catalog_template: degraded
224+
input_file_templates:
225+
truth:
226+
flavor: baseline
227+
tag: test
228+
kwargs:
229+
algorithms: ['all']
230+
- PipelineTemplate:
231+
name: pz
232+
pipeline_class: rail.pipelines.estimation.pz_all.PzPipeline
233+
input_catalog_template: degraded
234+
output_catalog_template: degraded
235+
input_file_templates:
236+
input_train:
237+
flavor: baseline
238+
tag: train
239+
input_test:
240+
flavor: baseline
241+
tag: test
242+
kwargs:
243+
algorithms: ['all']
244+
# These describe the selections going from "truth" to "reduced" catalog
245+
246+
# These just all use different limiting i-band magnitudes
247+
Selections:
248+
- Selection:
249+
name: maglim_25.5
250+
cuts:
251+
maglim_i: [null, 25.5]
252+
- Selection:
253+
name: gold
254+
cuts:
255+
maglim_i: [null, 25.5]
256+
- Selection:
257+
name: blend
258+
cuts:
259+
maglim_i: [null, 26.0]
260+
- Selection:
261+
name: crap
262+
cuts:
263+
maglim_i: [null, 30.0]
264+
- Selection:
265+
name: all
266+
cuts:
267+
maglim_i: [null, null]
268+
269+
# These describe the subsampling used to make test and training datasets
270+
Subsamples:
271+
# Testing subsample with 100 events
272+
- Subsample:
273+
name: test_100k
274+
seed: 1234
275+
num_objects: 100000
276+
# Training subsample with 100k events, and a different seed to select the events
277+
- Subsample:
278+
name: train_100k
279+
seed: 4321
280+
num_objects: 100000
281+
# Training subsample with 100k events, and a different seed to select the events
282+
- Subsample:
283+
name: train_200k
284+
seed: 5555
285+
num_objects: 200000
286+
287+
# These describe the ways we can split the data
288+
Splitters:
289+
- Splitter:
290+
name: random_splitter
291+
Split: RandomSplitter
292+
Module: rail.projects.splitter

examples/rail_project_example.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,9 @@
215215
],
216216
"metadata": {
217217
"kernelspec": {
218-
"display_name": "Python 3 (ipykernel)",
218+
"display_name": "testenv",
219219
"language": "python",
220-
"name": "python3"
220+
"name": "testenv"
221221
},
222222
"language_info": {
223223
"codemirror_mode": {
@@ -229,7 +229,7 @@
229229
"name": "python",
230230
"nbconvert_exporter": "python",
231231
"pygments_lexer": "ipython3",
232-
"version": "3.11.4"
232+
"version": "3.12.12"
233233
}
234234
},
235235
"nbformat": 4,

0 commit comments

Comments
 (0)