@@ -8,39 +8,39 @@ pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
88 bitmask ^= bitmask << 32 ;
99 bitmask
1010}
11-
12- /// SVE2 implementation of `get_nonspace_bits`.
13- /// But this won't get the full bitmap
11+ /// SVE2 implementation: Returns the index of the first non-space char (0-15).
12+ /// Returns 16 if all characters are spaces.
1413#[ inline( always) ]
15- pub unsafe fn get_nonspace_bits ( data : & [ u8 ; 16 ] ) -> u64 {
16- let mut index : u64 ;
17- // 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
14+ pub unsafe fn get_nonspace_index ( data : & [ u8 ; 16 ] ) -> usize {
15+ let mut idx : u64 = 16 ; // Default to 16 (Not Found)
16+ // 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
1817 let tokens: u32 = 0x090a0d20 ;
1918
2019 core:: arch:: asm!(
2120 "ptrue p0.b, vl16" ,
2221 "ld1b {{z0.b}}, p0/z, [{ptr}]" ,
23- // broadcast token set
2422 "mov z1.s, {t:w}" ,
2523
26- // nmatch: find token does not match
24+ // 1. Identify non-space characters
25+ // NMATCH sets the Z flag if NO non-spaces are found (all whitespace)
2726 "nmatch p1.b, p0/z, z0.b, z1.b" ,
2827
29- // locate
30- "brkb p1.b, p0/z, p1.b" ,
31- // count number of true bits
32- "cntp {idx}, p0, p1.b" ,
28+ // 2. Fast Path: Branch if NO non-space characters were found.
29+ // b.none checks the Z flag set by nmatch.
30+ // If Z=1 (all spaces), we skip the calculation and keep idx=16.
31+ "b.none 1f" ,
32+
33+ // 3. Slow Path (Found something): Calculate the exact index
34+ "brkb p2.b, p0/z, p1.b" , // Mask bits *after* the first match
35+ "cntp {idx}, p0, p2.b" , // Count leading matches
3336
37+ "1:" ,
3438 ptr = in( reg) data. as_ptr( ) ,
3539 t = in( reg) tokens,
36- idx = out ( reg) index ,
40+ idx = inout ( reg) idx ,
3741 out( "z0" ) _, out( "z1" ) _,
38- out( "p0" ) _, out( "p1" ) _,
42+ out( "p0" ) _, out( "p1" ) _, out ( "p2" ) _ ,
3943 ) ;
4044
41- if index < 16 {
42- 1u64 << index
43- } else {
44- 0
45- }
45+ idx as usize
4646}
0 commit comments