Skip to content

Commit 35e7fdb

Browse files
fmt
1 parent 48746d8 commit 35e7fdb

4 files changed

Lines changed: 69 additions & 48 deletions

File tree

src/parser.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::{
3030
reader::Reader,
3131
serde::de::invalid_type_number,
3232
util::{
33-
arch::{get_nonspace_bits, prefix_xor},
33+
arch::prefix_xor,
3434
string::*,
3535
unicode::{codepoint_to_utf8, hex_to_u32_nocheck},
3636
},
@@ -252,7 +252,7 @@ impl SpaceSkipper {
252252
// then we use simd to accelerate skipping space
253253
while let Some(chunk) = reader.peek_n(64) {
254254
let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
255-
let bitmap = unsafe { get_nonspace_bits(chunk) };
255+
let bitmap = unsafe { crate::util::arch::get_nonspace_bits(chunk) };
256256
if bitmap != 0 {
257257
self.nospace_bits = bitmap;
258258
self.nospace_start = reader.index() as isize;
@@ -288,7 +288,7 @@ impl SpaceSkipper {
288288
// then we use simd to accelerate skipping space
289289
while let Some(chunk) = reader.peek_n(16) {
290290
let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 16]) };
291-
let bitmap = unsafe { get_nonspace_bits(chunk) };
291+
let bitmap = unsafe { crate::util::arch::get_nonspace_bits(chunk) };
292292
if bitmap != 0 {
293293
let cnt = bitmap.trailing_zeros() as usize;
294294
let ch = chunk[cnt];

src/util/arch/aarch64.rs

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
4343
// just for minification (or just to identify the structural characters),
4444
// there is a small untaken optimization opportunity here. We deliberately
4545
// do not pick it up.
46-
#[cfg(not(target_feature = "sve2"))]
4746
#[inline(always)]
4847
pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 {
4948
use std::arch::aarch64::*;
@@ -75,39 +74,3 @@ pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 {
7574
chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(48))),
7675
)
7776
}
78-
79-
#[cfg(target_feature = "sve2")]
80-
#[inline(always)]
81-
pub unsafe fn get_nonspace_bits(data: &[u8; 16]) -> u64 {
82-
let mut index: u64;
83-
// 空白符集合: 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
84-
let tokens: u32 = 0x090a0d20;
85-
86-
core::arch::asm!(
87-
"ptrue p0.b, vl16",
88-
"ld1b {{z0.b}}, p0/z, [{ptr}]",
89-
"mov z1.s, {t:w}", // 广播 4 个空白符到 z1
90-
91-
// nmatch 寻找不属于 {09, 0a, 0d, 20} 的字符
92-
// 结果存入 p1,p1 中 true 的位置表示“非空白符”
93-
"nmatch p1.b, p0/z, z0.b, z1.b",
94-
95-
// 定位第一个非空白符的位置
96-
"brkb p1.b, p0/z, p1.b", // 截断,只保留第一个 true 之前的位为 true
97-
"cntp {idx}, p0, p1.b", // 统计数量,得到第一个非空白符的 index
98-
99-
ptr = in(reg) data.as_ptr(),
100-
t = in(reg) tokens,
101-
idx = out(reg) index,
102-
out("z0") _, out("z1") _,
103-
out("p0") _, out("p1") _,
104-
);
105-
106-
// 如果 index < 16,返回 1 << index,使外部 trailing_zeros() 拿到正确偏移
107-
// 如果 index == 16,返回 0,触发外部 skip_space 的“全空白”跳过逻辑
108-
if index < 16 {
109-
1u64 << index
110-
} else {
111-
0
112-
}
113-
}

src/util/arch/mod.rs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ cfg_if::cfg_if! {
22
if #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq", target_feature = "avx2", target_feature = "sse2"))] {
33
mod x86_64;
44
pub use x86_64::*;
5+
} else if #[cfg(all(target_feature="sve2", target_arch="aarch64"))] {
6+
mod sve2;
7+
pub use sve2::*;
58
} else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
69
mod aarch64;
710
pub use aarch64::*;
@@ -13,13 +16,22 @@ cfg_if::cfg_if! {
1316

1417
#[cfg(test)]
1518
mod test {
16-
// use super::*;
19+
use super::*;
1720

18-
// #[test]
19-
// fn test_get_non_space_bits() {
20-
// let input = b"\t\r\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
21-
// let non_space_bits = unsafe { get_nonspace_bits(input) };
22-
// let expected_bits = 0b1111111111111111111111111111111111111111111111111111111111110000;
23-
// assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}");
24-
// }
21+
#[test]
22+
fn test_get_non_space_bits() {
23+
let input = b"\t\r\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
24+
cfg_if::cfg_if! {
25+
if #[cfg(all(target_feature="sve2", target_arch="aarch64"))] {
26+
let non_space_bits = unsafe { get_nonspace_bits(std::mem::transmute(input)) };
27+
// sve2 cannot generate the full bitmap(without performance loss)
28+
let expected_bits = 0b10000;
29+
assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}");
30+
} else {
31+
let non_space_bits = unsafe { get_nonspace_bits(input) };
32+
let expected_bits = 0b1111111111111111111111111111111111111111111111111111111111110000;
33+
assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}");
34+
}
35+
}
36+
}
2537
}

src/util/arch/sve2.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
2+
let mut bitmask = bitmask;
3+
bitmask ^= bitmask << 1;
4+
bitmask ^= bitmask << 2;
5+
bitmask ^= bitmask << 4;
6+
bitmask ^= bitmask << 8;
7+
bitmask ^= bitmask << 16;
8+
bitmask ^= bitmask << 32;
9+
bitmask
10+
}
11+
12+
/// SVE2 implementation of `get_nonspace_bits`.
13+
/// But this won't get the full bitmap
14+
#[inline(always)]
15+
pub unsafe fn get_nonspace_bits(data: &[u8; 16]) -> u64 {
16+
let mut index: u64;
17+
// 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
18+
let tokens: u32 = 0x090a0d20;
19+
20+
core::arch::asm!(
21+
"ptrue p0.b, vl16",
22+
"ld1b {{z0.b}}, p0/z, [{ptr}]",
23+
// broadcast token set
24+
"mov z1.s, {t:w}",
25+
26+
// nmatch: find token does not match
27+
"nmatch p1.b, p0/z, z0.b, z1.b",
28+
29+
// locate
30+
"brkb p1.b, p0/z, p1.b",
31+
// count number of true bits
32+
"cntp {idx}, p0, p1.b",
33+
34+
ptr = in(reg) data.as_ptr(),
35+
t = in(reg) tokens,
36+
idx = out(reg) index,
37+
out("z0") _, out("z1") _,
38+
out("p0") _, out("p1") _,
39+
);
40+
41+
if index < 16 {
42+
1u64 << index
43+
} else {
44+
0
45+
}
46+
}

0 commit comments

Comments
 (0)