@@ -239,6 +239,100 @@ class AddOp: public Op {
239239 return std::shared_ptr<Op>(new AddOp<T, LabeledTensorT1, LabeledTensorT2>{*this });
240240 }
241241
242+ void display_info () const override {
243+ auto lhs_tensor = lhs_.tensor ();
244+ auto rhs_tensor = rhs_.tensor ();
245+
246+ auto lhs_tis_vec = lhs_tensor.tiled_index_spaces ();
247+ auto rhs_tis_vec = rhs_tensor.tiled_index_spaces ();
248+ std::vector<size_t > dims_sizes_lhs;
249+ std::vector<size_t > dims_sizes_rhs;
250+
251+ for (const auto & tis: lhs_tis_vec) { dims_sizes_lhs.push_back (tis.max_num_indices ()); }
252+
253+ for (const auto & tis: rhs_tis_vec) { dims_sizes_rhs.push_back (tis.max_num_indices ()); }
254+
255+ std::vector<int > block_sizes_lhs;
256+ std::vector<int > block_sizes_rhs;
257+ int max_block_size = 0 ;
258+ LabelLoopNest loop_nest_lhs{lhs_.labels ()};
259+ LabelLoopNest loop_nest_rhs{rhs_.labels ()};
260+
261+ for (auto bid: loop_nest_lhs) {
262+ auto tranlated_bid = internal::translate_blockid (bid, lhs_);
263+ auto block_dims = lhs_tensor.block_dims (tranlated_bid);
264+ int block_size = 1 ;
265+ for (auto & bd: block_dims) { block_size *= bd; }
266+ if (block_size > max_block_size) { max_block_size = block_size; }
267+ block_sizes_lhs.push_back (block_size);
268+ }
269+
270+ for (auto bid: loop_nest_rhs) {
271+ auto tranlated_bid = internal::translate_blockid (bid, rhs_);
272+ auto block_dims = rhs_tensor.block_dims (tranlated_bid);
273+ int block_size = 1 ;
274+ for (auto & bd: block_dims) { block_size *= bd; }
275+ if (block_size > max_block_size) { max_block_size = block_size; }
276+ block_sizes_rhs.push_back (block_size);
277+ }
278+
279+ int total_size_lhs = 1 ;
280+ for (auto & d: dims_sizes_lhs) { total_size_lhs *= d; }
281+ int total_size_rhs = 1 ;
282+ for (auto & d: dims_sizes_rhs) { total_size_rhs *= d; }
283+
284+ IndexLabelVec merged_use_labels =
285+ internal::merge_vector<IndexLabelVec>(lhs_.labels (), rhs_.labels ());
286+ auto unique_entries_by_primary_labels =
287+ internal::unique_entries_by_primary_label (merged_use_labels);
288+
289+ int total_tasks = 1 ;
290+ for (auto & entry: unique_entries_by_primary_labels) {
291+ total_tasks *= entry.tiled_index_space ().num_tiles ();
292+ }
293+
294+ std::cout << " AddOp\n " ;
295+ std::cout << " \t LHS_Tensor sizes = " ;
296+ for (auto & d: dims_sizes_lhs) { std::cout << d << " " ; }
297+ std::cout << std::endl;
298+ std::cout << " \t RHS_Tensor sizes = " ;
299+ for (auto & d: dims_sizes_rhs) { std::cout << d << " " ; }
300+ std::cout << std::endl;
301+ std::cout << " \t Total LHS size = " << total_size_lhs << std::endl;
302+ std::cout << " \t Total RHS size = " << total_size_rhs << std::endl;
303+
304+ LabelLoopNest loop_nest{merged_use_labels};
305+
306+ std::cout << " \t Max block size = " << max_block_size << std::endl;
307+ std::cout << " \t Number of total tasks = " << total_tasks << std::endl;
308+
309+ int task_id = 0 ;
310+
311+ for (const auto & blockid: loop_nest) {
312+ IndexVector cblockid (lhs_.labels ().size ());
313+ IndexVector ablockid (rhs_.labels ().size ());
314+
315+ std::copy (blockid.begin (), blockid.begin () + lhs_.labels ().size (), cblockid.begin ());
316+ std::copy (blockid.begin () + lhs_.labels ().size (), blockid.end (), ablockid.begin ());
317+
318+ const auto translated_cblockid = internal::translate_blockid (cblockid, lhs_);
319+ const auto translated_ablockid = internal::translate_blockid (ablockid, rhs_);
320+
321+ auto lhs_dims = lhs_.tensor ().block_dims (translated_cblockid);
322+ auto rhs_dims = rhs_.tensor ().block_dims (translated_ablockid);
323+
324+ int lhs_block_size = 1 ;
325+ for (auto & bd: lhs_dims) { lhs_block_size *= bd; }
326+ int rhs_block_size = 1 ;
327+ for (auto & bd: rhs_dims) { rhs_block_size *= bd; }
328+
329+ std::cout << " \t Task " << task_id << std::endl;
330+ std::cout << " \t LHS block size = " << lhs_block_size << std::endl;
331+ std::cout << " \t RHS block size = " << rhs_block_size << std::endl;
332+ task_id++;
333+ }
334+ }
335+
242336 void execute (ExecutionContext& ec, ExecutionHW hw = ExecutionHW::CPU) override {
243337 EXPECTS (lhs_.tensor ().execution_context () != nullptr );
244338
0 commit comments