@@ -11,7 +11,6 @@ use std::{
1111use faststr:: FastStr ;
1212use serde:: de:: { self , Expected , Unexpected } ;
1313use sonic_number:: { parse_number, ParserNumber } ;
14- #[ cfg( all( target_feature = "neon" , target_arch = "aarch64" ) ) ]
1514// use sonic_simd::bits::NeonBits; // not used with unified u32 path
1615use sonic_simd:: { i8x32, m8x32, u8x32, u8x64, Mask , Simd } ;
1716
@@ -212,11 +211,108 @@ pub(crate) struct Pair<'de> {
212211 pub status : ParseStatus ,
213212}
214213
215- pub struct Parser < R > {
216- pub read : R ,
217- error_index : usize , // mark the error position
214+ /// default bitmap based space skipper
215+ /// will cache the bitmap
216+ #[ cfg( not( all( target_arch = "aarch64" , target_feature = "sve2" ) ) ) ]
217+ struct SpaceSkipper {
218218 nospace_bits : u64 , // SIMD marked nospace bitmap
219219 nospace_start : isize , // the start position of nospace_bits
220+ }
221+
222+ #[ cfg( not( all( target_arch = "aarch64" , target_feature = "sve2" ) ) ) ]
223+ impl SpaceSkipper {
224+ pub fn new ( ) -> Self {
225+ Self {
226+ nospace_bits : 0 ,
227+ nospace_start : -128 ,
228+ }
229+ }
230+
231+ #[ inline( always) ]
232+ pub fn skip_space < ' de , R : Reader < ' de > > ( & mut self , reader : & mut R ) -> Option < u8 > {
233+ // fast path 2: reuse the bitmap for short key or numbers
234+ let nospace_offset = ( reader. index ( ) as isize ) - self . nospace_start ;
235+ if nospace_offset < 64 {
236+ let bitmap = {
237+ let mask = !( ( 1 << nospace_offset) - 1 ) ;
238+ self . nospace_bits & mask
239+ } ;
240+ if bitmap != 0 {
241+ let cnt = bitmap. trailing_zeros ( ) as usize ;
242+ let ch = reader. at ( self . nospace_start as usize + cnt) ;
243+ reader. set_index ( self . nospace_start as usize + cnt + 1 ) ;
244+
245+ return Some ( ch) ;
246+ } else {
247+ // we can still fast skip the marked space in here.
248+ reader. set_index ( self . nospace_start as usize + 64 ) ;
249+ }
250+ }
251+
252+ // then we use simd to accelerate skipping space
253+ while let Some ( chunk) = reader. peek_n ( 64 ) {
254+ let chunk = unsafe { & * ( chunk. as_ptr ( ) as * const [ _ ; 64 ] ) } ;
255+ let bitmap = unsafe { get_nonspace_bits ( chunk) } ;
256+ if bitmap != 0 {
257+ self . nospace_bits = bitmap;
258+ self . nospace_start = reader. index ( ) as isize ;
259+ let cnt = bitmap. trailing_zeros ( ) as usize ;
260+ let ch = chunk[ cnt] ;
261+ reader. eat ( cnt + 1 ) ;
262+
263+ return Some ( ch) ;
264+ }
265+ reader. eat ( 64 )
266+ }
267+
268+ while let Some ( ch) = reader. next ( ) {
269+ if !is_whitespace ( ch) {
270+ return Some ( ch) ;
271+ }
272+ }
273+ None
274+ }
275+ }
276+
277+ #[ cfg( all( target_arch = "aarch64" , target_feature = "sve2" ) ) ]
278+ struct SpaceSkipper ;
279+
280+ #[ cfg( all( target_arch = "aarch64" , target_feature = "sve2" ) ) ]
281+ impl SpaceSkipper {
282+ pub fn new ( ) -> Self {
283+ Self
284+ }
285+
286+ #[ inline( always) ]
287+ pub fn skip_space < ' de , R : Reader < ' de > > ( & mut self , reader : & mut R ) -> Option < u8 > {
288+ // then we use simd to accelerate skipping space
289+ while let Some ( chunk) = reader. peek_n ( 16 ) {
290+ let chunk = unsafe { & * ( chunk. as_ptr ( ) as * const [ _ ; 16 ] ) } ;
291+ let bitmap = unsafe { get_nonspace_bits ( chunk) } ;
292+ if bitmap != 0 {
293+ let cnt = bitmap. trailing_zeros ( ) as usize ;
294+ let ch = chunk[ cnt] ;
295+ reader. eat ( cnt + 1 ) ;
296+
297+ return Some ( ch) ;
298+ }
299+ reader. eat ( 16 )
300+ }
301+
302+ while let Some ( ch) = reader. next ( ) {
303+ if !is_whitespace ( ch) {
304+ //
305+ return Some ( ch) ;
306+ }
307+ }
308+ None
309+ }
310+ }
311+
312+ pub struct Parser < R > {
313+ pub read : R ,
314+ error_index : usize , // mark the error position
315+ skipper : SpaceSkipper , // space skipper, maybe bitmap based or sve2 based
220316 pub ( crate ) cfg : DeserializeCfg ,
221317}
222318
@@ -244,8 +340,7 @@ where
244340 Self {
245341 read,
246342 error_index : usize:: MAX ,
247- nospace_bits : 0 ,
248- nospace_start : -128 ,
343+ skipper : SpaceSkipper :: new ( ) ,
249344 cfg : DeserializeCfg :: default ( ) ,
250345 }
251346 }
@@ -1306,62 +1401,17 @@ where
13061401
13071402 #[ inline( always) ]
13081403 pub fn skip_space ( & mut self ) -> Option < u8 > {
1309- let reader = & mut self . read ;
1310- // fast path 1: for nospace or single space
1311- // most JSON is like ` "name": "balabala" `
1312- if let Some ( ch) = reader. next ( ) {
1313- if !is_whitespace ( ch) {
1314- return Some ( ch) ;
1315- }
1316- }
1317- if let Some ( ch) = reader. next ( ) {
1404+ if let Some ( ch) = self . read . next ( ) {
13181405 if !is_whitespace ( ch) {
13191406 return Some ( ch) ;
13201407 }
13211408 }
1322-
1323- // fast path 2: reuse the bitmap for short key or numbers
1324- let nospace_offset = ( reader. index ( ) as isize ) - self . nospace_start ;
1325- if nospace_offset < 64 {
1326- let bitmap = {
1327- let mask = !( ( 1 << nospace_offset) - 1 ) ;
1328- self . nospace_bits & mask
1329- } ;
1330- if bitmap != 0 {
1331- let cnt = bitmap. trailing_zeros ( ) as usize ;
1332- let ch = reader. at ( self . nospace_start as usize + cnt) ;
1333- reader. set_index ( self . nospace_start as usize + cnt + 1 ) ;
1334-
1335- return Some ( ch) ;
1336- } else {
1337- // we can still fast skip the marked space in here.
1338- reader. set_index ( self . nospace_start as usize + 64 ) ;
1339- }
1340- }
1341-
1342- // then we use simd to accelerate skipping space
1343- while let Some ( chunk) = reader. peek_n ( 64 ) {
1344- let chunk = unsafe { & * ( chunk. as_ptr ( ) as * const [ _ ; 64 ] ) } ;
1345- let bitmap = unsafe { get_nonspace_bits ( chunk) } ;
1346- if bitmap != 0 {
1347- self . nospace_bits = bitmap;
1348- self . nospace_start = reader. index ( ) as isize ;
1349- let cnt = bitmap. trailing_zeros ( ) as usize ;
1350- let ch = chunk[ cnt] ;
1351- reader. eat ( cnt + 1 ) ;
1352-
1353- return Some ( ch) ;
1354- }
1355- reader. eat ( 64 )
1356- }
1357-
1358- while let Some ( ch) = reader. next ( ) {
1409+ if let Some ( ch) = self . read . next ( ) {
13591410 if !is_whitespace ( ch) {
1360- //
13611411 return Some ( ch) ;
13621412 }
13631413 }
1364- None
1414+ self . skipper . skip_space ( & mut self . read )
13651415 }
13661416
13671417 #[ inline( always) ]
0 commit comments