@@ -150,7 +150,7 @@ pub(crate) trait Stage1Parse {
150150 type Utf8Validator : ChunkedUtf8Validator ;
151151 type SimdRepresentation ;
152152
153- unsafe fn new ( ptr : & [ u8 ] ) -> Self ;
153+ unsafe fn new ( ptr : [ u8 ; SIMDINPUT_LENGTH ] ) -> Self ;
154154
155155 unsafe fn compute_quote_mask ( quote_bits : u64 ) -> u64 ;
156156
@@ -665,6 +665,7 @@ impl Deserializer<'_> {
665665 #[ cfg_attr( not( feature = "no-inline" ) , inline) ]
666666 pub ( crate ) unsafe fn find_structural_bits (
667667 input : & [ u8 ] ,
668+ len : usize ,
668669 structural_indexes : & mut Vec < u32 > ,
669670 ) -> std:: result:: Result < ( ) , ErrorType > {
670671 // This is a nasty hack, we don't have a chunked implementation for native rust
@@ -675,18 +676,23 @@ impl Deserializer<'_> {
675676 } ;
676677 #[ cfg( not( feature = "portable" ) ) ]
677678 unsafe {
678- Self :: _find_structural_bits :: < impls:: native:: SimdInput > ( input, structural_indexes)
679+ Self :: _find_structural_bits :: < impls:: native:: SimdInput > ( input, len , structural_indexes)
679680 }
680681 }
681682
682683 #[ cfg( all( feature = "portable" , not( feature = "runtime-detection" ) ) ) ]
683684 #[ cfg_attr( not( feature = "no-inline" ) , inline) ]
684685 pub ( crate ) unsafe fn find_structural_bits (
685686 input : & [ u8 ] ,
687+ len : usize ,
686688 structural_indexes : & mut Vec < u32 > ,
687689 ) -> std:: result:: Result < ( ) , ErrorType > {
688690 unsafe {
689- Self :: _find_structural_bits :: < impls:: portable:: SimdInput > ( input, structural_indexes)
691+ Self :: _find_structural_bits :: < impls:: portable:: SimdInput > (
692+ input,
693+ len,
694+ structural_indexes,
695+ )
690696 }
691697 }
692698
@@ -698,9 +704,12 @@ impl Deserializer<'_> {
698704 #[ cfg_attr( not( feature = "no-inline" ) , inline) ]
699705 pub ( crate ) unsafe fn find_structural_bits (
700706 input : & [ u8 ] ,
707+ len : usize ,
701708 structural_indexes : & mut Vec < u32 > ,
702709 ) -> std:: result:: Result < ( ) , ErrorType > {
703- unsafe { Self :: _find_structural_bits :: < impls:: avx2:: SimdInput > ( input, structural_indexes) }
710+ unsafe {
711+ Self :: _find_structural_bits :: < impls:: avx2:: SimdInput > ( input, len, structural_indexes)
712+ }
704713 }
705714
706715 #[ cfg( all(
@@ -712,28 +721,35 @@ impl Deserializer<'_> {
712721 #[ cfg_attr( not( feature = "no-inline" ) , inline) ]
713722 pub ( crate ) unsafe fn find_structural_bits (
714723 input : & [ u8 ] ,
724+ len : usize ,
715725 structural_indexes : & mut Vec < u32 > ,
716726 ) -> std:: result:: Result < ( ) , ErrorType > {
717- unsafe { Self :: _find_structural_bits :: < impls:: sse42:: SimdInput > ( input, structural_indexes) }
727+ unsafe {
728+ Self :: _find_structural_bits :: < impls:: sse42:: SimdInput > ( input, len, structural_indexes)
729+ }
718730 }
719731
720732 #[ cfg( all( target_arch = "aarch64" , not( feature = "portable" ) ) ) ]
721733 #[ cfg_attr( not( feature = "no-inline" ) , inline) ]
722734 pub ( crate ) unsafe fn find_structural_bits (
723- input : & [ u8 ] ,
735+ input : & AlignedBuf ,
736+ len : usize ,
724737 structural_indexes : & mut Vec < u32 > ,
725738 ) -> std:: result:: Result < ( ) , ErrorType > {
726- unsafe { Self :: _find_structural_bits :: < impls:: neon:: SimdInput > ( input, structural_indexes) }
739+ unsafe {
740+ Self :: _find_structural_bits :: < impls:: neon:: SimdInput > ( input, len, structural_indexes)
741+ }
727742 }
728743
729744 #[ cfg( all( target_feature = "simd128" , not( feature = "portable" ) ) ) ]
730745 #[ cfg_attr( not( feature = "no-inline" ) , inline) ]
731746 pub ( crate ) unsafe fn find_structural_bits (
732747 input : & [ u8 ] ,
748+ len : usize ,
733749 structural_indexes : & mut Vec < u32 > ,
734750 ) -> std:: result:: Result < ( ) , ErrorType > {
735751 unsafe {
736- Self :: _find_structural_bits :: < impls:: simd128:: SimdInput > ( input, structural_indexes)
752+ Self :: _find_structural_bits :: < impls:: simd128:: SimdInput > ( input, len , structural_indexes)
737753 }
738754 }
739755}
@@ -795,7 +811,7 @@ impl<'de> Deserializer<'de> {
795811 buffer : & mut Buffers ,
796812 tape : & mut Vec < Node < ' de > > ,
797813 ) -> Result < ( ) > {
798- const LOTS_OF_ZOERS : [ u8 ; SIMDINPUT_LENGTH ] = [ 0 ; SIMDINPUT_LENGTH ] ;
814+ const LOTS_OF_ZOERS : [ u8 ; SIMDINPUT_LENGTH ] = [ 0x20 ; SIMDINPUT_LENGTH ] ;
799815 let len = input. len ( ) ;
800816 let simd_safe_len = len + SIMDINPUT_LENGTH ;
801817
@@ -830,7 +846,7 @@ impl<'de> Deserializer<'de> {
830846 // safety: all bytes are initialized
831847 input_buffer. set_len ( simd_safe_len) ;
832848
833- Self :: find_structural_bits ( input, & mut buffer. structural_indexes )
849+ Self :: find_structural_bits ( input_buffer , input. len ( ) , & mut buffer. structural_indexes )
834850 . map_err ( Error :: generic) ?;
835851 } ;
836852
@@ -881,10 +897,11 @@ impl<'de> Deserializer<'de> {
881897 #[ cfg_attr( not( feature = "no-inline" ) , inline) ]
882898 #[ allow( clippy:: cast_possible_truncation) ]
883899 pub ( crate ) unsafe fn _find_structural_bits < S : Stage1Parse > (
884- input : & [ u8 ] ,
900+ input : & AlignedBuf ,
901+ len : usize ,
885902 structural_indexes : & mut Vec < u32 > ,
886903 ) -> std:: result:: Result < ( ) , ErrorType > {
887- let len = input. len ( ) ;
904+ // let len = input.len();
888905 // 8 is a heuristic number to estimate it turns out a rate of 1/8 structural characters
889906 // leads almost never to relocations.
890907 structural_indexes. clear ( ) ;
@@ -916,18 +933,18 @@ impl<'de> Deserializer<'de> {
916933 // expensive carryless multiply in the previous step with this work
917934 let mut structurals: u64 = 0 ;
918935
919- let lenminus64: usize = if len < 64 { 0 } else { len - 64 } ;
936+ // let lenminus64: usize = if len < 64 { 0 } else { len - 64 };
920937 let mut idx: usize = 0 ;
921938 let mut error_mask: u64 = 0 ; // for unescaped characters within strings (ASCII code points < 0x20)
922939
923- while idx < lenminus64 {
940+ while idx <= len / SIMDINPUT_LENGTH {
924941 /*
925942 #ifndef _MSC_VER
926943 __builtin_prefetch(buf + idx + 128);
927944 #endif
928945 */
929- let chunk = unsafe { input. get_kinda_unchecked ( idx..idx + 64 ) } ;
930- unsafe { utf8_validator. update_from_chunks ( chunk) } ;
946+ let chunk: [ u8 ; SIMDINPUT_LENGTH ] = unsafe { input. load_register ( idx) } ;
947+ unsafe { utf8_validator. update_from_chunks ( & chunk) } ;
931948
932949 let input = unsafe { S :: new ( chunk) } ;
933950 // detect odd sequences of backslashes
@@ -946,7 +963,7 @@ impl<'de> Deserializer<'de> {
946963
947964 // take the previous iterations structural bits, not our current iteration,
948965 // and flatten
949- unsafe { S :: flatten_bits ( structural_indexes, idx as u32 , structurals) } ;
966+ unsafe { S :: flatten_bits ( structural_indexes, ( idx * 64 ) as u32 , structurals) } ;
950967
951968 let mut whitespace: u64 = 0 ;
952969 unsafe { input. find_whitespace_and_structurals ( & mut whitespace, & mut structurals) } ;
@@ -959,60 +976,15 @@ impl<'de> Deserializer<'de> {
959976 quote_bits,
960977 & mut prev_iter_ends_pseudo_pred,
961978 ) ;
962- idx += SIMDINPUT_LENGTH ;
979+ idx += 1 ;
963980 }
964981
965- // we use a giant copy-paste which is ugly.
966- // but otherwise the string needs to be properly padded or else we
967- // risk invalidating the UTF-8 checks.
968- if idx < len {
969- let mut tmpbuf: [ u8 ; SIMDINPUT_LENGTH ] = [ 0x20 ; SIMDINPUT_LENGTH ] ;
970- unsafe {
971- tmpbuf
972- . as_mut_ptr ( )
973- . copy_from ( input. as_ptr ( ) . add ( idx) , len - idx) ;
974- } ;
975- unsafe { utf8_validator. update_from_chunks ( & tmpbuf) } ;
976-
977- let input = unsafe { S :: new ( & tmpbuf) } ;
978-
979- // detect odd sequences of backslashes
980- let odd_ends: u64 =
981- input. find_odd_backslash_sequences ( & mut prev_iter_ends_odd_backslash) ;
982-
983- // detect insides of quote pairs ("quote_mask") and also our quote_bits
984- // themselves
985- let mut quote_bits: u64 = 0 ;
986- let quote_mask: u64 = input. find_quote_mask_and_bits (
987- odd_ends,
988- & mut prev_iter_inside_quote,
989- & mut quote_bits,
990- & mut error_mask,
991- ) ;
992-
993- // take the previous iterations structural bits, not our current iteration,
994- // and flatten
995- unsafe { S :: flatten_bits ( structural_indexes, idx as u32 , structurals) } ;
996-
997- let mut whitespace: u64 = 0 ;
998- unsafe { input. find_whitespace_and_structurals ( & mut whitespace, & mut structurals) } ;
999-
1000- // fixup structurals to reflect quotes and add pseudo-structural characters
1001- structurals = S :: finalize_structurals (
1002- structurals,
1003- whitespace,
1004- quote_mask,
1005- quote_bits,
1006- & mut prev_iter_ends_pseudo_pred,
1007- ) ;
1008- idx += SIMDINPUT_LENGTH ;
1009- }
1010982 // This test isn't in upstream, for some reason the error mask is et for then.
1011983 if prev_iter_inside_quote != 0 {
1012984 return Err ( ErrorType :: Syntax ) ;
1013985 }
1014986 // finally, flatten out the remaining structurals from the last iteration
1015- unsafe { S :: flatten_bits ( structural_indexes, idx as u32 , structurals) } ;
987+ unsafe { S :: flatten_bits ( structural_indexes, ( idx * 64 ) as u32 , structurals) } ;
1016988
1017989 // a valid JSON file cannot have zero structural indexes - we should have
1018990 // found something (note that we compare to 1 as we always add the root!)
@@ -1051,22 +1023,40 @@ impl AlignedBuf {
10511023 /// Creates a new buffer that is aligned with the simd register size
10521024 #[ must_use]
10531025 pub fn with_capacity ( capacity : usize ) -> Self {
1054- let Ok ( layout) = Layout :: from_size_align ( capacity, SIMDJSON_PADDING ) else {
1055- Self :: capacity_overflow ( )
1026+ let offset = capacity % SIMDINPUT_LENGTH ;
1027+ let capacity = if offset == 0 {
1028+ capacity
1029+ } else {
1030+ capacity + SIMDINPUT_LENGTH - offset
10561031 } ;
1032+
10571033 if mem:: size_of :: < usize > ( ) < 8 && capacity > isize:: MAX as usize {
10581034 Self :: capacity_overflow ( )
10591035 }
1036+ let layout = match Layout :: from_size_align ( capacity, SIMDINPUT_LENGTH ) {
1037+ Ok ( layout) => layout,
1038+ Err ( _) => Self :: capacity_overflow ( ) ,
1039+ } ;
1040+
1041+ let inner = match unsafe { NonNull :: new ( alloc ( layout) ) } {
1042+ Some ( ptr) => ptr,
1043+ None => handle_alloc_error ( layout) ,
1044+ } ;
1045+ Self {
1046+ layout,
1047+ capacity,
1048+ len : 0 ,
1049+ inner,
1050+ }
1051+ }
1052+
1053+ unsafe fn load_register ( & self , idx : usize ) -> [ u8 ; SIMDINPUT_LENGTH ] {
10601054 unsafe {
1061- let Some ( inner) = NonNull :: new ( alloc ( layout) ) else {
1062- handle_alloc_error ( layout)
1063- } ;
1064- Self {
1065- layout,
1066- capacity,
1067- len : 0 ,
1068- inner,
1069- }
1055+ self . inner
1056+ . as_ptr ( )
1057+ . cast :: < [ u8 ; SIMDINPUT_LENGTH ] > ( )
1058+ . add ( idx)
1059+ . read ( )
10701060 }
10711061 }
10721062
0 commit comments