@@ -311,7 +311,7 @@ Result<void> MdioToSegy(
311311
312312 // 1 GiB cache
313313 auto cacheJson = nlohmann::json::parse (
314- R"( {"cache_pool": {"total_bytes_limit": 1073741824 }})" );
314+ R"( {"cache_pool": {"total_bytes_limit": 5000000000 }})" );
315315 auto ctxSpec = Context::Spec::FromJson (cacheJson);
316316 auto ctx = Context (ctxSpec.value ());
317317
@@ -367,6 +367,20 @@ Result<void> MdioToSegy(
367367 num_traces *= shape[i];
368368 }
369369
370+ // Use the domain size of the second-fastest growing dimension for chunking
371+ // This ensures we process complete "slices" along that dimension
372+ Index output_chunk_size = 1 ;
373+ if (rank >= 2 ) {
374+ // Use the full domain size of the second-fastest growing spatial dimension
375+ output_chunk_size = shape[rank - 2 ];
376+ } else if (rank >= 1 ) {
377+ // Fallback to first spatial dimension size
378+ output_chunk_size = shape[0 ];
379+ }
380+
381+ std::cout << " Using output chunk size: " << output_chunk_size << " (full domain of dimension "
382+ << (rank >= 2 ? rank - 2 : 0 ) << " )" << std::endl;
383+
370384 // Build Zarr spec for output SEG-Y
371385 nlohmann::json spec;
372386 spec[" driver" ] = " zarr" ;
@@ -399,7 +413,7 @@ Result<void> MdioToSegy(
399413 spec[" metadata" ] = {
400414 {" dtype" , std::string (" |V" ) + std::to_string (trace_bytes)},
401415 {" shape" , {num_traces}},
402- {" chunks" , {1 }},
416+ {" chunks" , {output_chunk_size }}, // Use the computed chunk size instead of 1
403417 {" dimension_separator" , " ." },
404418 {" compressor" , nullptr },
405419 {" fill_value" , nullptr },
@@ -411,79 +425,148 @@ Result<void> MdioToSegy(
411425
412426 MDIO_ASSIGN_OR_RETURN (
413427 auto out_var,
414- Variable<>::Open (spec, constants::kCreateClean ).result ());
415-
416- // prepare an (N-1)-length odometer for spatial dims
417- std::vector<Index> coords (rank - 1 , 0 );
418- std::vector<RangeDescriptor<Index>> descs;
419- descs.reserve (rank);
420-
421- // flat loop over all traces
422- for (Index trace_idx = 0 ; trace_idx < num_traces; ++trace_idx) {
423- // Progress bar (tqdm style)
424- double progress = static_cast <double >(trace_idx) / static_cast <double >(num_traces);
425- int bar_width = 50 ;
426- int pos = static_cast <int >(bar_width * progress);
427-
428- std::cout << " \r [" ;
429- for (int i = 0 ; i < bar_width; ++i) {
430- if (i < pos) std::cout << " =" ;
431- else if (i == pos) std::cout << " >" ;
432- else std::cout << " " ;
428+ Variable<>::Open (spec, constants::kCreateClean , ctx).result ());
429+
430+ // Process traces by chunking along the second-fastest growing dimension
431+ // This ensures we align with the underlying storage chunks
432+ size_t chunk_dim = (rank >= 2 ) ? rank - 2 : 0 ; // Index of second-fastest growing spatial dim
433+ Index chunk_dim_size = shape[chunk_dim];
434+
435+ // Calculate total number of "slices" in all other spatial dimensions
436+ Index num_outer_slices = 1 ;
437+ for (size_t d = 0 ; d < rank - 1 ; ++d) {
438+ if (d != chunk_dim) {
439+ num_outer_slices *= shape[d];
433440 }
434- std::cout << " ] " << static_cast <int >(progress * 100.0 ) << " % "
435- << " (" << trace_idx << " /" << num_traces << " traces)" ;
436- std::cout.flush ();
437-
438- // build slice for dims 0..N-2
439- descs.clear ();
440- for (size_t d = 0 ; d < rank - 1 ; ++d) {
441- descs.push_back (RangeDescriptor<Index>{
442- labels[d],
443- coords[d],
444- coords[d] + 1 ,
445- 1 });
441+ }
442+
443+ Index traces_processed = 0 ;
444+
445+ // Iterate through all combinations of the other spatial dimensions
446+ std::vector<Index> outer_coords (rank - 1 , 0 );
447+
448+ for (Index outer_slice = 0 ; outer_slice < num_outer_slices; ++outer_slice) {
449+ // Convert outer_slice to coordinates for all dimensions except chunk_dim
450+ Index temp_slice = outer_slice;
451+ for (int d = static_cast <int >(rank) - 2 ; d >= 0 ; --d) {
452+ if (static_cast <size_t >(d) != chunk_dim) {
453+ outer_coords[d] = temp_slice % shape[d];
454+ temp_slice /= shape[d];
455+ }
446456 }
447- // full slice on final dimension
448- descs.push_back (RangeDescriptor<Index>{
449- labels[rank - 1 ],
450- 0 ,
451- num_samples,
452- 1 });
453-
454- // read the full trace (header + samples)
455- MDIO_ASSIGN_OR_RETURN (auto trace_var, seismic_var.slice (descs));
456- MDIO_ASSIGN_OR_RETURN (auto trace_data, trace_var.Read ().result ());
457457
458- const char * in_ptr = reinterpret_cast <const char *>(
459- trace_data.get_data_accessor ().data ());
460- ptrdiff_t in_off = trace_data.get_flattened_offset ();
461-
462- // slice+read one output slot
463- RangeDescriptor<Index> write_desc{" trace" , trace_idx, trace_idx + 1 , 1 };
464- MDIO_ASSIGN_OR_RETURN (auto out_slice, out_var.slice (write_desc));
465- MDIO_ASSIGN_OR_RETURN (auto out_data, out_slice.Read ().result ());
466-
467- char * out_ptr = reinterpret_cast <char *>(
468- out_data.get_data_accessor ().data ());
469- ptrdiff_t out_off = out_data.get_flattened_offset ();
470-
471- // zero header+data, then copy samples after 240-byte header
472- std::memset (out_ptr + out_off, 0 , trace_bytes);
473- std::memcpy (
474- out_ptr + out_off + 240 ,
475- in_ptr + (in_off * sample_bytes), // Convert element offset to byte offset
476- static_cast <size_t >(num_samples) * sample_bytes);
477-
478- // write it back
479- auto fut = out_slice.Write (out_data);
480- if (!fut.status ().ok ()) return fut.status ();
458+ // Now chunk along the chunk_dim
459+ for (Index chunk_start = 0 ; chunk_start < chunk_dim_size; chunk_start += output_chunk_size) {
460+ Index chunk_end = std::min (chunk_start + output_chunk_size, chunk_dim_size);
461+ Index chunk_size = chunk_end - chunk_start;
462+
463+ // Progress bar (tqdm style)
464+ double progress = static_cast <double >(traces_processed) / static_cast <double >(num_traces);
465+ int bar_width = 50 ;
466+ int pos = static_cast <int >(bar_width * progress);
467+
468+ // std::cout << "\r[";
469+ // for (int i = 0; i < bar_width; ++i) {
470+ // if (i < pos) std::cout << "=";
471+ // else if (i == pos) std::cout << ">";
472+ // else std::cout << " ";
473+ // }
474+ // std::cout << "] " << static_cast<int>(progress * 100.0) << "% "
475+ // << "(" << traces_processed << "/" << num_traces << " traces)";
476+ // std::cout.flush();
477+
478+ // Build slice descriptors for this specific chunk
479+ std::vector<RangeDescriptor<Index>> chunk_descs;
480+ chunk_descs.reserve (rank);
481+
482+ // For each spatial dimension, either use the fixed coordinate or the chunk range
483+ for (size_t d = 0 ; d < rank - 1 ; ++d) {
484+ if (d == chunk_dim) {
485+ // This is the dimension we're chunking along
486+ chunk_descs.push_back (RangeDescriptor<Index>{
487+ labels[d],
488+ chunk_start,
489+ chunk_end,
490+ 1 });
491+ } else {
492+ // This is a fixed coordinate for this outer slice
493+ chunk_descs.push_back (RangeDescriptor<Index>{
494+ labels[d],
495+ outer_coords[d],
496+ outer_coords[d] + 1 ,
497+ 1 });
498+ }
499+ }
500+
501+ // Full slice on the last dimension (samples/time)
502+ chunk_descs.push_back (RangeDescriptor<Index>{
503+ labels[rank - 1 ],
504+ 0 ,
505+ num_samples,
506+ 1 });
481507
482- // increment odometer
483- for (int d = static_cast <int >(rank) - 2 ; d >= 0 ; --d) {
484- coords[d]++;
485- if (coords[d] < shape[d]) break ;
486- coords[d] = 0 ;
508+ // Read only the seismic data we need for this chunk
509+ MDIO_ASSIGN_OR_RETURN (auto chunk_var, seismic_var.slice (chunk_descs));
510+ std::cout << chunk_var << std::endl;
511+ MDIO_ASSIGN_OR_RETURN (auto chunk_data, chunk_var.Read ().result ());
512+ std::cout << " Finished reading chunk_var..." << std::endl;
513+
514+ const char * chunk_ptr = reinterpret_cast <const char *>(
515+ chunk_data.get_data_accessor ().data ());
516+ ptrdiff_t chunk_off = chunk_data.get_flattened_offset ();
517+
518+ // Allocate buffer for the output chunk
519+ std::vector<char > chunk_buffer (chunk_size * trace_bytes, 0 );
520+
521+ // Process each trace in the chunk
522+ for (Index i = 0 ; i < chunk_size; ++i) {
523+ // Calculate the offset in the chunk data for this trace
524+ Index trace_offset_in_chunk = i * num_samples;
525+
526+ // Copy trace data to chunk buffer (240 bytes header + samples)
527+ char * trace_buffer = chunk_buffer.data () + (i * trace_bytes);
528+ std::memset (trace_buffer, 0 , 240 ); // Zero the header
529+ std::memcpy (
530+ trace_buffer + 240 ,
531+ chunk_ptr + (chunk_off + trace_offset_in_chunk) * sample_bytes,
532+ static_cast <size_t >(num_samples) * sample_bytes);
533+ }
534+
535+ // Calculate the output trace range for this chunk
536+ // Convert coordinates back to linear trace indices
537+ Index output_start = 0 ;
538+ Index multiplier = 1 ;
539+
540+ // Build the linear index from coordinates
541+ std::vector<Index> trace_coords = outer_coords;
542+ trace_coords[chunk_dim] = chunk_start;
543+
544+ for (int d = static_cast <int >(rank) - 2 ; d >= 0 ; --d) {
545+ output_start += trace_coords[d] * multiplier;
546+ multiplier *= shape[d];
547+ }
548+
549+ Index output_end = output_start + chunk_size;
550+
551+ // Write the chunk to output
552+ RangeDescriptor<Index> write_desc{" trace" , output_start, output_end, 1 };
553+ MDIO_ASSIGN_OR_RETURN (auto out_slice, out_var.slice (write_desc));
554+ std::cout << out_slice << std::endl;
555+ MDIO_ASSIGN_OR_RETURN (auto out_data, out_slice.Read ().result ());
556+ std::cout << " Finished reading out_data..." << std::endl;
557+ char * out_ptr = reinterpret_cast <char *>(
558+ out_data.get_data_accessor ().data ());
559+ ptrdiff_t out_off = out_data.get_flattened_offset ();
560+
561+ // Copy the chunk buffer to output
562+ std::memcpy (out_ptr + out_off, chunk_buffer.data (), chunk_size * trace_bytes);
563+
564+ // Write it back
565+ std::cout << " Writing back to out_slice..." << std::endl;
566+ auto fut = out_slice.Write (out_data);
567+ if (!fut.status ().ok ()) return fut.status ();
568+ std::cout << " Finished writing back to out_slice..." << std::endl;
569+ traces_processed += chunk_size;
487570 }
488571 }
489572
0 commit comments