Skip to content

Commit e988c4d

Browse files
fmt
1 parent 48746d8 commit e988c4d

4 files changed

Lines changed: 71 additions & 58 deletions

File tree

src/parser.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::{
3030
reader::Reader,
3131
serde::de::invalid_type_number,
3232
util::{
33-
arch::{get_nonspace_bits, prefix_xor},
33+
arch::prefix_xor,
3434
string::*,
3535
unicode::{codepoint_to_utf8, hex_to_u32_nocheck},
3636
},
@@ -252,7 +252,7 @@ impl SpaceSkipper {
252252
// then we use simd to accelerate skipping space
253253
while let Some(chunk) = reader.peek_n(64) {
254254
let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 64]) };
255-
let bitmap = unsafe { get_nonspace_bits(chunk) };
255+
let bitmap = unsafe { crate::util::arch::get_nonspace_bits(chunk) };
256256
if bitmap != 0 {
257257
self.nospace_bits = bitmap;
258258
self.nospace_start = reader.index() as isize;
@@ -288,7 +288,7 @@ impl SpaceSkipper {
288288
// then we use simd to accelerate skipping space
289289
while let Some(chunk) = reader.peek_n(16) {
290290
let chunk = unsafe { &*(chunk.as_ptr() as *const [_; 16]) };
291-
let bitmap = unsafe { get_nonspace_bits(chunk) };
291+
let bitmap = unsafe { crate::util::arch::get_nonspace_bits(chunk) };
292292
if bitmap != 0 {
293293
let cnt = bitmap.trailing_zeros() as usize;
294294
let ch = chunk[cnt];

src/util/arch/aarch64.rs

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,7 @@
1717

1818
// Not use PMULL instructions, but it is apparently slow.
1919
// This is copied from simdjson.
20-
pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
21-
let mut bitmask = bitmask;
22-
bitmask ^= bitmask << 1;
23-
bitmask ^= bitmask << 2;
24-
bitmask ^= bitmask << 4;
25-
bitmask ^= bitmask << 8;
26-
bitmask ^= bitmask << 16;
27-
bitmask ^= bitmask << 32;
28-
bitmask
29-
}
20+
3021

3122
// We compute whitespace and op separately. If the code later only use one or the
3223
// other, given the fact that all functions are aggressively inlined, we can
@@ -43,7 +34,6 @@ pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
4334
// just for minification (or just to identify the structural characters),
4435
// there is a small untaken optimization opportunity here. We deliberately
4536
// do not pick it up.
46-
#[cfg(not(target_feature = "sve2"))]
4737
#[inline(always)]
4838
pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 {
4939
use std::arch::aarch64::*;
@@ -75,39 +65,3 @@ pub unsafe fn get_nonspace_bits(data: &[u8; 64]) -> u64 {
7565
chunk_nonspace_bits(vld1q_u8(data.as_ptr().offset(48))),
7666
)
7767
}
78-
79-
#[cfg(target_feature = "sve2")]
80-
#[inline(always)]
81-
pub unsafe fn get_nonspace_bits(data: &[u8; 16]) -> u64 {
82-
let mut index: u64;
83-
// 空白符集合: 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
84-
let tokens: u32 = 0x090a0d20;
85-
86-
core::arch::asm!(
87-
"ptrue p0.b, vl16",
88-
"ld1b {{z0.b}}, p0/z, [{ptr}]",
89-
"mov z1.s, {t:w}", // 广播 4 个空白符到 z1
90-
91-
// nmatch 寻找不属于 {09, 0a, 0d, 20} 的字符
92-
// 结果存入 p1,p1 中 true 的位置表示“非空白符”
93-
"nmatch p1.b, p0/z, z0.b, z1.b",
94-
95-
// 定位第一个非空白符的位置
96-
"brkb p1.b, p0/z, p1.b", // 截断,只保留第一个 true 之前的位为 true
97-
"cntp {idx}, p0, p1.b", // 统计数量,得到第一个非空白符的 index
98-
99-
ptr = in(reg) data.as_ptr(),
100-
t = in(reg) tokens,
101-
idx = out(reg) index,
102-
out("z0") _, out("z1") _,
103-
out("p0") _, out("p1") _,
104-
);
105-
106-
// 如果 index < 16,返回 1 << index,使外部 trailing_zeros() 拿到正确偏移
107-
// 如果 index == 16,返回 0,触发外部 skip_space 的“全空白”跳过逻辑
108-
if index < 16 {
109-
1u64 << index
110-
} else {
111-
0
112-
}
113-
}

src/util/arch/mod.rs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,20 @@ cfg_if::cfg_if! {
22
if #[cfg(all(target_arch = "x86_64", target_feature = "pclmulqdq", target_feature = "avx2", target_feature = "sse2"))] {
33
mod x86_64;
44
pub use x86_64::*;
5+
} else if #[cfg(all(target_feature="sve2", target_arch="aarch64"))] {
6+
mod sve2;
7+
pub use sve2::*;
8+
9+
pub unsafe fn prefix_xor(bitmask: u64) -> u64 {
10+
let mut bitmask = bitmask;
11+
bitmask ^= bitmask << 1;
12+
bitmask ^= bitmask << 2;
13+
bitmask ^= bitmask << 4;
14+
bitmask ^= bitmask << 8;
15+
bitmask ^= bitmask << 16;
16+
bitmask ^= bitmask << 32;
17+
bitmask
18+
}
519
} else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
620
mod aarch64;
721
pub use aarch64::*;
@@ -13,13 +27,22 @@ cfg_if::cfg_if! {
1327

1428
#[cfg(test)]
1529
mod test {
16-
// use super::*;
30+
use super::*;
1731

18-
// #[test]
19-
// fn test_get_non_space_bits() {
20-
// let input = b"\t\r\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
21-
// let non_space_bits = unsafe { get_nonspace_bits(input) };
22-
// let expected_bits = 0b1111111111111111111111111111111111111111111111111111111111110000;
23-
// assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}");
24-
// }
32+
#[test]
33+
fn test_get_non_space_bits() {
34+
let input = b"\t\r\n xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
35+
cfg_if::cfg_if! {
36+
if #[cfg(all(target_feature="sve2", target_arch="aarch64"))] {
37+
let non_space_bits = unsafe { get_nonspace_bits(std::mem::transmute(input)) };
38+
// sve2 cannot generate the full bitmap(without performance loss)
39+
let expected_bits = 0b10000;
40+
assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}");
41+
} else {
42+
let non_space_bits = unsafe { get_nonspace_bits(input) };
43+
let expected_bits = 0b1111111111111111111111111111111111111111111111111111111111110000;
44+
assert_eq!(non_space_bits, expected_bits, "bits is {non_space_bits:b}");
45+
}
46+
}
47+
}
2548
}

src/util/arch/sve2.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
/// SVE2 implementation of `get_nonspace_bits`.
3+
/// But this won't get the full bitmap
4+
#[inline(always)]
5+
pub unsafe fn get_nonspace_bits(data: &[u8; 16]) -> u64 {
6+
let mut index: u64;
7+
// 0x09 (Tab), 0x0A (LF), 0x0D (CR), 0x20 (Space)
8+
let tokens: u32 = 0x090a0d20;
9+
10+
core::arch::asm!(
11+
"ptrue p0.b, vl16",
12+
"ld1b {{z0.b}}, p0/z, [{ptr}]",
13+
// broadcast token set
14+
"mov z1.s, {t:w}",
15+
16+
// nmatch: find token does not match
17+
"nmatch p1.b, p0/z, z0.b, z1.b",
18+
19+
// locate
20+
"brkb p1.b, p0/z, p1.b",
21+
// count number of true bits
22+
"cntp {idx}, p0, p1.b",
23+
24+
ptr = in(reg) data.as_ptr(),
25+
t = in(reg) tokens,
26+
idx = out(reg) index,
27+
out("z0") _, out("z1") _,
28+
out("p0") _, out("p1") _,
29+
);
30+
31+
if index < 16 {
32+
1u64 << index
33+
} else {
34+
0
35+
}
36+
}

0 commit comments

Comments
 (0)