@@ -5779,26 +5779,17 @@ fn main(
57795779 if (chunk_hi < slice_hi) {
57805780 next_chunk_bucket = entry_bucket_id[chunk_hi];
57815781 }
5782- // PER_THREAD_ENTRIES can exceed 32 (iter 9: 64), so the emit/pair
5783- // flag bitmasks split into low (off ∈ [0, 32)) and high (off ∈ [32, 64))
5784- // u32s. A single u32 would silently overflow on \`1u << off\` for off >= 32.
57855782 var local_emit: u32 = 0u;
57865783 var local_pair: u32 = 0u;
5787- var local_emit_mask_lo: u32 = 0u;
5788- var local_emit_mask_hi: u32 = 0u;
5789- var local_pair_mask_lo: u32 = 0u;
5790- var local_pair_mask_hi: u32 = 0u;
5784+ var local_emit_mask: u32 = 0u;
5785+ var local_pair_mask: u32 = 0u;
57915786 for (var off: u32 = 0u; off < PER_THREAD_ENTRIES; off = off + 1u) {
57925787 let e = chunk_lo + off;
57935788 if (e >= chunk_hi) { continue; }
57945789 let p = e - local_break_pos[off];
57955790 if ((p & 1u) != 0u) { continue; }
57965791 local_emit = local_emit + 1u;
5797- if (off < 32u) {
5798- local_emit_mask_lo = local_emit_mask_lo | (1u << off);
5799- } else {
5800- local_emit_mask_hi = local_emit_mask_hi | (1u << (off - 32u));
5801- }
5792+ local_emit_mask = local_emit_mask | (1u << off);
58025793 var next_b: u32 = UNPAIRED_SENTINEL;
58035794 if (off + 1u < PER_THREAD_ENTRIES) {
58045795 if (e + 1u < chunk_hi) {
@@ -5811,11 +5802,7 @@ fn main(
58115802 }
58125803 if (next_b == local_buckets[off]) {
58135804 local_pair = local_pair + 1u;
5814- if (off < 32u) {
5815- local_pair_mask_lo = local_pair_mask_lo | (1u << off);
5816- } else {
5817- local_pair_mask_hi = local_pair_mask_hi | (1u << (off - 32u));
5818- }
5805+ local_pair_mask = local_pair_mask | (1u << off);
58195806 }
58205807 }
58215808
@@ -5851,21 +5838,12 @@ fn main(
58515838 var raw_w: u32 = raw_base;
58525839 var pair_w: u32 = pair_base;
58535840 for (var off: u32 = 0u; off < PER_THREAD_ENTRIES; off = off + 1u) {
5854- var emit_bit: u32;
5855- var pair_bit: u32;
5856- if (off < 32u) {
5857- emit_bit = local_emit_mask_lo & (1u << off);
5858- pair_bit = local_pair_mask_lo & (1u << off);
5859- } else {
5860- emit_bit = local_emit_mask_hi & (1u << (off - 32u));
5861- pair_bit = local_pair_mask_hi & (1u << (off - 32u));
5862- }
5863- if (emit_bit == 0u) { continue; }
5841+ if ((local_emit_mask & (1u << off)) == 0u) { continue; }
58645842 let e = chunk_lo + off;
58655843 let raw = raw_w;
58665844 raw_w = raw_w + 1u;
58675845 meta_pool[pair_idx_a_base + raw] = e;
5868- if (pair_bit != 0u) {
5846+ if ((local_pair_mask & (1u << off)) != 0u) {
58695847 meta_pool[pair_idx_b_base + raw] = e + 1u;
58705848 let pair_rank = pair_w;
58715849 pair_w = pair_w + 1u;
@@ -5881,19 +5859,10 @@ fn main(
58815859 var raw_r: u32 = raw_base;
58825860 var pair_r: u32 = pair_base;
58835861 for (var off: u32 = 0u; off < PER_THREAD_ENTRIES; off = off + 1u) {
5884- var emit_bit: u32;
5885- var pair_bit: u32;
5886- if (off < 32u) {
5887- emit_bit = local_emit_mask_lo & (1u << off);
5888- pair_bit = local_pair_mask_lo & (1u << off);
5889- } else {
5890- emit_bit = local_emit_mask_hi & (1u << (off - 32u));
5891- pair_bit = local_pair_mask_hi & (1u << (off - 32u));
5892- }
5893- if (emit_bit == 0u) { continue; }
5862+ if ((local_emit_mask & (1u << off)) == 0u) { continue; }
58945863 let raw = raw_r;
58955864 raw_r = raw_r + 1u;
5896- if (pair_bit != 0u) {
5865+ if ((local_pair_mask & (1u << off)) != 0u) {
58975866 let pair_rank = pair_r;
58985867 pair_r = pair_r + 1u;
58995868 if (pair_rank == 0u) {
@@ -6030,26 +5999,17 @@ fn main(
60305999 if (chunk_hi < slice_hi) {
60316000 next_chunk_bucket = input_bucket_id[chunk_hi];
60326001 }
6033- // PER_THREAD_ENTRIES can exceed 32 (iter 9: 64), so the emit/pair
6034- // flag bitmasks split into low (off ∈ [0, 32)) and high (off ∈ [32, 64))
6035- // u32s. A single u32 would silently overflow on \`1u << off\` for off >= 32.
60366002 var local_emit: u32 = 0u;
60376003 var local_pair: u32 = 0u;
6038- var local_emit_mask_lo: u32 = 0u;
6039- var local_emit_mask_hi: u32 = 0u;
6040- var local_pair_mask_lo: u32 = 0u;
6041- var local_pair_mask_hi: u32 = 0u;
6004+ var local_emit_mask: u32 = 0u;
6005+ var local_pair_mask: u32 = 0u;
60426006 for (var off: u32 = 0u; off < PER_THREAD_ENTRIES; off = off + 1u) {
60436007 let e = chunk_lo + off;
60446008 if (e >= chunk_hi) { continue; }
60456009 let p = e - local_break_pos[off];
60466010 if ((p & 1u) != 0u) { continue; }
60476011 local_emit = local_emit + 1u;
6048- if (off < 32u) {
6049- local_emit_mask_lo = local_emit_mask_lo | (1u << off);
6050- } else {
6051- local_emit_mask_hi = local_emit_mask_hi | (1u << (off - 32u));
6052- }
6012+ local_emit_mask = local_emit_mask | (1u << off);
60536013 var next_b: u32 = UNPAIRED_SENTINEL;
60546014 if (off + 1u < PER_THREAD_ENTRIES) {
60556015 if (e + 1u < chunk_hi) {
@@ -6062,11 +6022,7 @@ fn main(
60626022 }
60636023 if (next_b == local_buckets[off]) {
60646024 local_pair = local_pair + 1u;
6065- if (off < 32u) {
6066- local_pair_mask_lo = local_pair_mask_lo | (1u << off);
6067- } else {
6068- local_pair_mask_hi = local_pair_mask_hi | (1u << (off - 32u));
6069- }
6025+ local_pair_mask = local_pair_mask | (1u << off);
60706026 }
60716027 }
60726028
@@ -6102,21 +6058,12 @@ fn main(
61026058 var raw_w: u32 = raw_base;
61036059 var pair_w: u32 = pair_base;
61046060 for (var off: u32 = 0u; off < PER_THREAD_ENTRIES; off = off + 1u) {
6105- var emit_bit: u32;
6106- var pair_bit: u32;
6107- if (off < 32u) {
6108- emit_bit = local_emit_mask_lo & (1u << off);
6109- pair_bit = local_pair_mask_lo & (1u << off);
6110- } else {
6111- emit_bit = local_emit_mask_hi & (1u << (off - 32u));
6112- pair_bit = local_pair_mask_hi & (1u << (off - 32u));
6113- }
6114- if (emit_bit == 0u) { continue; }
6061+ if ((local_emit_mask & (1u << off)) == 0u) { continue; }
61156062 let e = chunk_lo + off;
61166063 let raw = raw_w;
61176064 raw_w = raw_w + 1u;
61186065 meta_pool[pair_idx_a_base + raw] = e;
6119- if (pair_bit != 0u) {
6066+ if ((local_pair_mask & (1u << off)) != 0u) {
61206067 meta_pool[pair_idx_b_base + raw] = e + 1u;
61216068 let pair_rank = pair_w;
61226069 pair_w = pair_w + 1u;
@@ -6132,19 +6079,10 @@ fn main(
61326079 var raw_r: u32 = raw_base;
61336080 var pair_r: u32 = pair_base;
61346081 for (var off: u32 = 0u; off < PER_THREAD_ENTRIES; off = off + 1u) {
6135- var emit_bit: u32;
6136- var pair_bit: u32;
6137- if (off < 32u) {
6138- emit_bit = local_emit_mask_lo & (1u << off);
6139- pair_bit = local_pair_mask_lo & (1u << off);
6140- } else {
6141- emit_bit = local_emit_mask_hi & (1u << (off - 32u));
6142- pair_bit = local_pair_mask_hi & (1u << (off - 32u));
6143- }
6144- if (emit_bit == 0u) { continue; }
6082+ if ((local_emit_mask & (1u << off)) == 0u) { continue; }
61456083 let raw = raw_r;
61466084 raw_r = raw_r + 1u;
6147- if (pair_bit != 0u) {
6085+ if ((local_pair_mask & (1u << off)) != 0u) {
61486086 let pair_rank = pair_r;
61496087 pair_r = pair_r + 1u;
61506088 if (pair_rank == 0u) {
0 commit comments