Skip to content

Commit 50b87ef

Browse files
committed
Framework for compile-time polymorphism.
In the exascale_api, allow pixel values to be calculation either on large array (all pixels), or with low-memory on just the whitelist consisting of shoebox pixels. This commit only gives the polymorphism framework; both implementations are currently identical giving the large-array behavior.
1 parent e173a6c commit 50b87ef

9 files changed

Lines changed: 781 additions & 737 deletions

File tree

simtbx/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33

44
def get_exascale(interface, context):
55
if context == "kokkos_gpu":
6-
from simtbx.kokkos import gpu_instance, gpu_energy_channels, gpu_detector, exascale_api
6+
from simtbx.kokkos import gpu_instance, gpu_energy_channels, gpu_detector, gpu_detector_small_whitelist
7+
from simtbx.kokkos import exascale_api, exascale_api_small_whitelist
78
elif context == "cuda":
89
from simtbx.gpu import gpu_instance, gpu_energy_channels, gpu_detector, exascale_api
910
else: raise NotImplementedError(context)
1011

1112
return dict(gpu_instance = gpu_instance, gpu_energy_channels = gpu_energy_channels,
12-
gpu_detector = gpu_detector, exascale_api = exascale_api)[interface]
13+
gpu_detector = gpu_detector, exascale_api = exascale_api,
14+
gpu_detector_small_whitelist = locals().get("gpu_detector_small_whitelist"),
15+
exascale_api_small_whitelist = locals().get("exascale_api_small_whitelist"))[interface]
1316

simtbx/kokkos/SConscript

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,10 @@ if not env_etc.no_boost_python:
168168
env_etc.include_registry.append(
169169
env=kokkos_ext_env,
170170
paths=env_etc.simtbx_common_includes + [env_etc.python_include])
171+
if True: # same construct as above, temporarily accommodate the eigen library
172+
env_etc.include_registry.append(
173+
env=kokkos_ext_env,
174+
paths=[env_etc.eigen_include])
171175
kokkos_ext_env.Replace(CXX=os.environ['CXX'])
172176
kokkos_ext_env.Replace(SHCXX=os.environ['CXX'])
173177
kokkos_ext_env.Replace(SHLINK=os.environ['CXX'])

simtbx/kokkos/detector.cpp

Lines changed: 0 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -92,168 +92,5 @@ namespace simtbx { namespace Kokkos {
9292
}
9393
}
9494

95-
vector_double_t
96-
kokkos_detector::construct_detail(dxtbx::model::Detector const & arg_detector) {
97-
//1) confirm the size
98-
SCITBX_ASSERT( m_panel_count == arg_detector.size() );
99-
SCITBX_ASSERT( m_panel_count >= 1 );
100-
101-
//2) confirm that array dimensions are similar for each size
102-
for (int ipanel=1; ipanel < arg_detector.size(); ++ipanel){
103-
SCITBX_ASSERT( arg_detector[ipanel].get_image_size()[1] == m_slow_dim_size );
104-
SCITBX_ASSERT( arg_detector[ipanel].get_image_size()[0] == m_fast_dim_size );
105-
}
106-
// printf(" m_total_pixel_count: %d\n", m_total_pixel_count);
107-
// printf(" m_slow_dim_size: %d\n", m_slow_dim_size);
108-
// printf(" m_fast_dim_size: %d\n", m_fast_dim_size);
109-
// printf(" m_panel_count: %d\n", m_panel_count);
110-
111-
//3) allocate a cuda array with these dimensions
112-
// separate accumulator image outside the usual nanoBragg data structure.
113-
// 1. accumulate contributions from a sequence of source energy channels computed separately
114-
// 2. represent multiple panels, all same rectangular shape; slowest dimension = n_panels
115-
vector_double_t view_floatimage( "m_accumulate_floatimage", m_total_pixel_count );
116-
return view_floatimage;
117-
};
118-
119-
kokkos_detector::kokkos_detector(int const& arg_device,
120-
dxtbx::model::Detector const & arg_detector,
121-
dxtbx::model::Beam const& arg_beam):
122-
h_deviceID(arg_device),
123-
metrology(arg_detector, arg_beam),
124-
m_panel_count( arg_detector.size() ),
125-
m_slow_dim_size( arg_detector[0].get_image_size()[1] ),
126-
m_fast_dim_size( arg_detector[0].get_image_size()[0] ),
127-
m_total_pixel_count( m_panel_count * m_slow_dim_size * m_fast_dim_size ),
128-
m_accumulate_floatimage( construct_detail(arg_detector) ) { }
129-
// Easy mistake: not realizing that the dxtbx detector model stores (fast,slow) sizes
130-
131-
kokkos_detector::kokkos_detector(int const& arg_device,
132-
const simtbx::nanoBragg::nanoBragg& nB):
133-
h_deviceID(arg_device),
134-
metrology(nB),
135-
m_panel_count(1),
136-
m_slow_dim_size(nB.spixels),
137-
m_fast_dim_size(nB.fpixels),
138-
m_total_pixel_count( m_panel_count * m_slow_dim_size * m_fast_dim_size ),
139-
m_accumulate_floatimage( vector_double_t( "m_accumulate_floatimage", m_total_pixel_count) ) { }
140-
141-
void
142-
kokkos_detector::scale_in_place(const double& factor){
143-
auto local_accumulate_floatimage = m_accumulate_floatimage;
144-
parallel_for("scale_in_place", range_policy(0,m_total_pixel_count), KOKKOS_LAMBDA (const int i) {
145-
local_accumulate_floatimage( i ) = local_accumulate_floatimage( i ) * factor;
146-
});
147-
}
148-
149-
void
150-
kokkos_detector::write_raw_pixels(simtbx::nanoBragg::nanoBragg& nB) {
151-
//only implement the monolithic detector case, one panel
152-
SCITBX_ASSERT(nB.spixels == m_slow_dim_size);
153-
SCITBX_ASSERT(nB.fpixels == m_fast_dim_size);
154-
SCITBX_ASSERT(m_panel_count == 1);
155-
// nB.raw_pixels = af::flex_double(af::flex_grid<>(nB.spixels,nB.fpixels));
156-
// do not reallocate CPU memory for the data write, as it is not needed
157-
158-
kokkostbx::transfer_kokkos2flex(nB.raw_pixels, m_accumulate_floatimage);
159-
// vector_double_t::HostMirror host_floatimage = create_mirror_view(m_accumulate_floatimage);
160-
// deep_copy(host_floatimage, m_accumulate_floatimage);
161-
162-
// printf(" m_total_pixel_count: %d\n", m_total_pixel_count);
163-
164-
// double * double_floatimage = nB.raw_pixels.begin();
165-
// for (int i=0; i<m_total_pixel_count; ++i) {
166-
// double_floatimage[i] = host_floatimage( i );
167-
// }
168-
}
169-
170-
af::flex_double
171-
kokkos_detector::get_raw_pixels(){
172-
//return the data array for the multipanel detector case
173-
af::flex_double output_array(af::flex_grid<>(m_panel_count,m_slow_dim_size,m_fast_dim_size), af::init_functor_null<double>());
174-
kokkostbx::transfer_kokkos2flex(output_array, m_accumulate_floatimage);
175-
176-
// vector_double_t::HostMirror host_floatimage = create_mirror_view(m_accumulate_floatimage);
177-
// deep_copy(host_floatimage, m_accumulate_floatimage);
178-
179-
// for (int i=0; i<m_total_pixel_count; ++i) {
180-
// output_array_ptr[ i ] = host_floatimage( i );
181-
// }
182-
return output_array;
183-
}
184-
185-
void
186-
kokkos_detector::set_active_pixels_on_GPU(af::shared<std::size_t> active_pixel_list_value) {
187-
m_active_pixel_size = active_pixel_list_value.size();
188-
kokkostbx::transfer_shared2kokkos(m_active_pixel_list, active_pixel_list_value);
189-
active_pixel_list = active_pixel_list_value;
190-
}
191-
192-
af::shared<double>
193-
kokkos_detector::get_whitelist_raw_pixels(af::shared<std::size_t> selection) {
194-
//return the data array for the multipanel detector case, but only for whitelist pixels
195-
vector_size_t active_pixel_selection = vector_size_t("active_pixel_selection", selection.size());
196-
kokkostbx::transfer_shared2kokkos(active_pixel_selection, selection);
197-
198-
size_t output_pixel_size = selection.size();
199-
vector_cudareal_t active_pixel_results = vector_cudareal_t("active_pixel_results", output_pixel_size);
200-
201-
auto temp = m_accumulate_floatimage;
202-
203-
parallel_for("get_active_pixel_selection",
204-
range_policy(0, output_pixel_size),
205-
KOKKOS_LAMBDA (const int i) {
206-
size_t index = active_pixel_selection( i );
207-
active_pixel_results( i ) = temp( index );
208-
});
209-
210-
af::shared<double> output_array(output_pixel_size, af::init_functor_null<double>());
211-
kokkostbx::transfer_kokkos2shared(output_array, active_pixel_results);
212-
213-
SCITBX_ASSERT(output_array.size() == output_pixel_size);
214-
return output_array;
215-
}
216-
217-
void
218-
kokkos_detector::each_image_allocate() {
219-
resize(m_rangemap, m_total_pixel_count);
220-
resize(m_omega_reduction, m_total_pixel_count);
221-
resize(m_max_I_x_reduction, m_total_pixel_count);
222-
resize(m_max_I_y_reduction, m_total_pixel_count);
223-
224-
resize(m_maskimage, m_total_pixel_count);
225-
resize(m_floatimage, m_total_pixel_count);
226-
227-
kokkostbx::transfer_shared2kokkos(m_sdet_vector, metrology.sdet);
228-
kokkostbx::transfer_shared2kokkos(m_fdet_vector, metrology.fdet);
229-
kokkostbx::transfer_shared2kokkos(m_odet_vector, metrology.odet);
230-
kokkostbx::transfer_shared2kokkos(m_pix0_vector, metrology.pix0);
231-
kokkostbx::transfer_shared2kokkos(m_distance, metrology.dists);
232-
kokkostbx::transfer_shared2kokkos(m_Xbeam, metrology.Xbeam);
233-
kokkostbx::transfer_shared2kokkos(m_Ybeam, metrology.Ybeam);
234-
fence();
235-
236-
// metrology.show();
237-
238-
// printf(" rangemap size:%d\n", m_rangemap.span());
239-
// printf(" omega_reduction size:%d\n", m_omega_reduction.span());
240-
// printf(" max_I_x_reduction size:%d\n", m_max_I_x_reduction.span());
241-
// printf(" max_I_y_reduction size:%d\n", m_max_I_y_reduction.span());
242-
// printf(" maskimage size:%d\n", m_maskimage.span());
243-
// printf(" floatimage size:%d\n", m_floatimage.span());
244-
// printf(" sdet_vector size:%d\n", m_sdet_vector.span());
245-
// printf(" fdet_vector size:%d\n", m_fdet_vector.span());
246-
// printf(" odet_vector size:%d\n", m_odet_vector.span());
247-
// printf(" pix0_vector size:%d\n", m_pix0_vector.span());
248-
// printf(" distance size:%d\n", m_distance.span());
249-
// printf(" Xbeam size:%d\n", m_Xbeam.span());
250-
// printf(" Ybeam size:%d\n", m_Ybeam.span());
251-
252-
// print_view(m_fdet_vector);
253-
// print_view(m_odet_vector, 1, 3);
254-
255-
// printf("DONE.\n");
256-
}
257-
25895
} // Kokkos
25996
} // simtbx

0 commit comments

Comments
 (0)