Skip to content

Commit a75ce81

Browse files
replace get_nonspace_bits with get_nonspace_index on sve2
1 parent 1e411e0 commit a75ce81

3 files changed

Lines changed: 25 additions & 27 deletions

File tree

src/parser.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -288,12 +288,11 @@ impl SpaceSkipper {
288288
// then we use simd to accelerate skipping space
289289
while let Some(chunk) = reader.peek_n(16) {
290290
let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 16]) };
291-
let bitmap = unsafe { crate::util::arch::get_nonspace_bits(chunk) };
292-
if bitmap != 0 {
293-
let cnt = bitmap.trailing_zeros() as usize;
294-
let ch = chunk[cnt];
295-
reader.eat(cnt + 1);
291+
let cnt = unsafe { crate::util::arch::get_nonspace_index(chunk) };
296292

293+
if cnt < 16 {
294+
let ch = chunk[cnt];
295+
reader.eat(cnt + 1); // Skip spaces + return char
297296
return Some(ch);
298297
}
299298
reader.eat(16)

src/util/arch/mod.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@ mod test {
2323
let input = b"\t\r\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
2424
cfg_if::cfg_if! {
2525
if #[cfg(all(target_feature="sve2", target_arch="aarch64"))] {
26-
let non_space_bits = unsafe { get_nonspace_bits(std::mem::transmute(input)) };
26+
let first_nonspace_idx = unsafe { get_nonspace_index(std::mem::transmute(input)) };
2727
// sve2 cannot generate the full bitmap(without performance loss)
28-
let expected_bits = 0b10000;
29-
assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}");
28+
assert_eq!(first_nonspace_idx, 4, "first non-space index is {first_nonspace_idx}");
3029
} else {
3130
let non_space_bits = unsafe { get_nonspace_bits(input) };
3231
let expected_bits = 0b1111111111111111111111111111111111111111111111111111111111110000;

src/util/arch/sve2.rs

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,39 @@ pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
88
bitmask ^= bitmask << 32;
99
bitmask
1010
}
11-
12-
/// SVE2 implementation of `get_nonspace_bits`.
13-
/// But this won't get the full bitmap
11+
/// SVE2 implementation: Returns the index of the first non-space char (0-15).
12+
/// Returns 16 if all characters are spaces.
1413
#[inline(always)]
15-
pub unsafe fn get_nonspace_bits(data: &[u8; 16]) -> u64 {
16-
let mut index: u64;
17-
// 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
14+
pub unsafe fn get_nonspace_index(data: &[u8; 16]) -> usize {
15+
let mut idx: u64 = 16; // Default to 16 (Not Found)
16+
// 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
1817
let tokens: u32 = 0x090a0d20;
1918

2019
core::arch::asm!(
2120
"ptrue p0.b, vl16",
2221
"ld1b {{z0.b}}, p0/z, [{ptr}]",
23-
// broadcast token set
2422
"mov z1.s, {t:w}",
2523

26-
// nmatch: find token does not match
24+
// 1. Identify non-space characters
25+
// NMATCH sets the Z flag if NO non-spaces are found (all whitespace)
2726
"nmatch p1.b, p0/z, z0.b, z1.b",
2827

29-
// locate
30-
"brkb p1.b, p0/z, p1.b",
31-
// count number of true bits
32-
"cntp {idx}, p0, p1.b",
28+
// 2. Fast Path: Branch if NO non-space characters were found.
29+
// b.none checks the Z flag set by nmatch.
30+
// If Z=1 (all spaces), we skip the calculation and keep idx=16.
31+
"b.none 1f",
32+
33+
// 3. Slow Path (Found something): Calculate the exact index
34+
"brkb p2.b, p0/z, p1.b", // Mask bits *after* the first match
35+
"cntp {idx}, p0, p2.b", // Count leading matches
3336

37+
"1:",
3438
ptr = in(reg) data.as_ptr(),
3539
t = in(reg) tokens,
36-
idx = out(reg) index,
40+
idx = inout(reg) idx,
3741
out("z0") _, out("z1") _,
38-
out("p0") _, out("p1") _,
42+
out("p0") _, out("p1") _, out("p2") _,
3943
);
4044

41-
if index < 16 {
42-
1u64 << index
43-
} else {
44-
0
45-
}
45+
idx as usize
4646
}

0 commit comments

Comments
 (0)