Skip to content

Commit 707de60

Browse files
authored
feat(rust): support skipping fields bytes when deserializing in compatible mode (apache#2545)
## What does this PR do? Support skipping unneeded field bytes when deserializing in compatible mode. ## Related issues apache#2531
1 parent c632500 commit 707de60

12 files changed

Lines changed: 202 additions & 96 deletions

File tree

rust/fory-core/src/fory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ impl Fory {
9090
&self.type_resolver
9191
}
9292

93-
pub fn register<T: 'static + StructSerializer>(&mut self, id: u32) {
93+
pub fn register<T: 'static + StructSerializer>(&mut self, id: i16) {
9494
let type_info = TypeInfo::new::<T>(self, id);
9595
self.type_resolver.register::<T>(type_info, id);
9696
}

rust/fory-core/src/meta/type_meta.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ static ENCODING_OPTIONS: &[Encoding] = &[
3939
Encoding::LowerUpperDigitSpecial,
4040
];
4141

42-
#[derive(Debug, PartialEq, Eq)]
42+
#[derive(Debug, PartialEq, Eq, Clone)]
4343
pub struct FieldType {
4444
pub type_id: i16,
45-
generics: Vec<FieldType>,
45+
pub generics: Vec<FieldType>,
4646
}
4747

4848
impl FieldType {
@@ -128,7 +128,7 @@ impl FieldType {
128128
}
129129
}
130130

131-
#[derive(Debug, PartialEq, Eq)]
131+
#[derive(Debug, PartialEq, Eq, Clone)]
132132
pub struct FieldInfo {
133133
pub field_name: String,
134134
pub field_type: FieldType,
@@ -213,19 +213,19 @@ impl FieldInfo {
213213

214214
#[derive(Debug)]
215215
pub struct TypeMetaLayer {
216-
type_id: u32,
216+
type_id: i16,
217217
field_infos: Vec<FieldInfo>,
218218
}
219219

220220
impl TypeMetaLayer {
221-
pub fn new(type_id: u32, field_infos: Vec<FieldInfo>) -> TypeMetaLayer {
221+
pub fn new(type_id: i16, field_infos: Vec<FieldInfo>) -> TypeMetaLayer {
222222
TypeMetaLayer {
223223
type_id,
224224
field_infos,
225225
}
226226
}
227227

228-
pub fn get_type_id(&self) -> u32 {
228+
pub fn get_type_id(&self) -> i16 {
229229
self.type_id
230230
}
231231

@@ -271,7 +271,7 @@ impl TypeMetaLayer {
271271
if is_register_by_name {
272272
todo!()
273273
} else {
274-
type_id = reader.var_int32() as u32;
274+
type_id = reader.var_int32() as i16;
275275
}
276276
let mut field_infos = Vec::with_capacity(num_fields);
277277
for _ in 0..num_fields {
@@ -293,11 +293,11 @@ impl TypeMeta {
293293
self.layers.first().unwrap().get_field_infos()
294294
}
295295

296-
pub fn get_type_id(&self) -> u32 {
296+
pub fn get_type_id(&self) -> i16 {
297297
self.layers.first().unwrap().get_type_id()
298298
}
299299

300-
pub fn from_fields(type_id: u32, field_infos: Vec<FieldInfo>) -> TypeMeta {
300+
pub fn from_fields(type_id: i16, field_infos: Vec<FieldInfo>) -> TypeMeta {
301301
TypeMeta {
302302
// hash: 0,
303303
layers: vec![TypeMetaLayer::new(type_id, field_infos)],

rust/fory-core/src/resolver/meta_resolver.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use std::rc::Rc;
2424

2525
#[derive(Default)]
2626
pub struct MetaReaderResolver {
27-
reading_type_defs: Vec<Rc<TypeMeta>>,
27+
pub reading_type_defs: Vec<Rc<TypeMeta>>,
2828
}
2929

3030
impl MetaReaderResolver {

rust/fory-core/src/resolver/type_resolver.rs

Lines changed: 10 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@
1818
use super::context::{ReadContext, WriteContext};
1919
use crate::error::Error;
2020
use crate::fory::Fory;
21-
use crate::serializer::{Serializer, StructSerializer};
22-
use crate::types::TypeId;
23-
use chrono::{NaiveDate, NaiveDateTime};
21+
use crate::serializer::StructSerializer;
2422
use std::{any::Any, collections::HashMap};
2523

2624
pub struct Harness {
@@ -50,18 +48,18 @@ impl Harness {
5048

5149
pub struct TypeInfo {
5250
type_def: Vec<u8>,
53-
type_id: u32,
51+
type_id: i16,
5452
}
5553

5654
impl TypeInfo {
57-
pub fn new<T: StructSerializer>(fory: &Fory, type_id: u32) -> TypeInfo {
55+
pub fn new<T: StructSerializer>(fory: &Fory, type_id: i16) -> TypeInfo {
5856
TypeInfo {
59-
type_def: T::type_def(fory),
57+
type_def: T::type_def(fory, type_id),
6058
type_id,
6159
}
6260
}
6361

64-
pub fn get_type_id(&self) -> u32 {
62+
pub fn get_type_id(&self) -> i16 {
6563
self.type_id
6664
}
6765

@@ -70,56 +68,12 @@ impl TypeInfo {
7068
}
7169
}
7270

71+
#[derive(Default)]
7372
pub struct TypeResolver {
74-
serialize_map: HashMap<u32, Harness>,
75-
type_id_map: HashMap<std::any::TypeId, u32>,
73+
serialize_map: HashMap<i16, Harness>,
74+
type_id_map: HashMap<std::any::TypeId, i16>,
7675
type_info_map: HashMap<std::any::TypeId, TypeInfo>,
7776
}
78-
macro_rules! register_harness {
79-
($ty:ty, $id:expr, $map:expr) => {{
80-
fn serializer(this: &dyn std::any::Any, context: &mut WriteContext) {
81-
let this = this.downcast_ref::<$ty>();
82-
match this {
83-
Some(v) => <$ty>::serialize(v, context),
84-
None => todo!(""),
85-
}
86-
}
87-
88-
fn deserializer(context: &mut ReadContext) -> Result<Box<dyn std::any::Any>, Error> {
89-
match <$ty>::deserialize(context) {
90-
Ok(v) => Ok(Box::new(v)),
91-
Err(e) => Err(e),
92-
}
93-
}
94-
95-
$map.insert($id as u32, Harness::new(serializer, deserializer));
96-
}};
97-
}
98-
99-
impl Default for TypeResolver {
100-
fn default() -> Self {
101-
let mut serialize_map = HashMap::new();
102-
103-
register_harness!(bool, TypeId::BOOL, serialize_map);
104-
register_harness!(i8, TypeId::INT8, serialize_map);
105-
register_harness!(i16, TypeId::INT16, serialize_map);
106-
register_harness!(i32, TypeId::INT32, serialize_map);
107-
register_harness!(i64, TypeId::INT64, serialize_map);
108-
register_harness!(f32, TypeId::FLOAT32, serialize_map);
109-
register_harness!(f64, TypeId::FLOAT64, serialize_map);
110-
111-
register_harness!(String, TypeId::STRING, serialize_map);
112-
113-
register_harness!(NaiveDate, TypeId::LOCAL_DATE, serialize_map);
114-
register_harness!(NaiveDateTime, TypeId::TIMESTAMP, serialize_map);
115-
116-
TypeResolver {
117-
serialize_map,
118-
type_id_map: HashMap::new(),
119-
type_info_map: HashMap::new(),
120-
}
121-
}
122-
}
12377

12478
impl TypeResolver {
12579
pub fn get_type_info(&self, type_id: std::any::TypeId) -> &TypeInfo {
@@ -131,7 +85,7 @@ impl TypeResolver {
13185
})
13286
}
13387

134-
pub fn register<T: StructSerializer>(&mut self, type_info: TypeInfo, id: u32) {
88+
pub fn register<T: StructSerializer>(&mut self, type_info: TypeInfo, id: i16) {
13589
fn serializer<T2: 'static + StructSerializer>(this: &dyn Any, context: &mut WriteContext) {
13690
let this = this.downcast_ref::<T2>();
13791
match this {
@@ -161,7 +115,7 @@ impl TypeResolver {
161115
self.get_harness(*self.type_id_map.get(&type_id).unwrap())
162116
}
163117

164-
pub fn get_harness(&self, id: u32) -> Option<&Harness> {
118+
pub fn get_harness(&self, id: i16) -> Option<&Harness> {
165119
self.serialize_map.get(&id)
166120
}
167121
}

rust/fory-core/src/serializer/any.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ impl Serializer for Box<dyn Any> {
7171
context
7272
.get_fory()
7373
.get_type_resolver()
74-
.get_harness(type_id as u32)
74+
.get_harness(type_id)
7575
.unwrap()
7676
.get_deserializer()(context)
7777
}

rust/fory-core/src/serializer/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ mod bool;
2727
mod datetime;
2828
mod list;
2929
mod map;
30+
pub mod nonexistent;
3031
mod number;
3132
mod option;
3233
mod primitive_list;
@@ -92,5 +93,5 @@ where
9293
}
9394

9495
pub trait StructSerializer: Serializer + 'static {
95-
fn type_def(fory: &Fory) -> Vec<u8>;
96+
fn type_def(fory: &Fory, type_id: i16) -> Vec<u8>;
9697
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::ensure;
19+
use crate::error::Error;
20+
use crate::meta::FieldType;
21+
use crate::resolver::context::ReadContext;
22+
use crate::serializer::Serializer;
23+
use crate::types::{RefFlag, TypeId, BASIC_TYPES, COLLECTION_TYPES};
24+
use anyhow::anyhow;
25+
use chrono::{NaiveDate, NaiveDateTime};
26+
27+
macro_rules! basic_type_deserialize {
28+
($tid:expr, $context:expr; $(($ty:ty, $id:ident)),+ $(,)?) => {
29+
$(
30+
if $tid == TypeId::$id {
31+
<$ty>::deserialize($context)?;
32+
return Ok(());
33+
}
34+
)+else {
35+
unreachable!()
36+
}
37+
};
38+
}
39+
40+
pub fn skip_field_value(context: &mut ReadContext, field_type: &FieldType) -> Result<(), Error> {
41+
match TypeId::try_from(field_type.type_id) {
42+
Ok(type_id) => {
43+
if BASIC_TYPES.contains(&type_id) {
44+
basic_type_deserialize!(type_id, context;
45+
(bool, BOOL),
46+
(i8, INT8),
47+
(i16, INT16),
48+
(i32, INT32),
49+
(i64, INT64),
50+
(f32, FLOAT32),
51+
(f64, FLOAT64),
52+
(String, STRING),
53+
(NaiveDate, LOCAL_DATE),
54+
(NaiveDateTime, TIMESTAMP),
55+
);
56+
} else if COLLECTION_TYPES.contains(&type_id) {
57+
let ref_flag = context.reader.i8();
58+
let actual_type_id = context.reader.i16();
59+
let type_id_num = type_id.into();
60+
ensure!(
61+
actual_type_id == type_id_num,
62+
anyhow!("Invalid field type, expected:{type_id_num}, actual:{actual_type_id}")
63+
);
64+
if ref_flag == (RefFlag::NotNullValue as i8)
65+
|| ref_flag == (RefFlag::RefValue as i8)
66+
{
67+
if type_id == TypeId::ARRAY || type_id == TypeId::SET {
68+
let length = context.reader.var_int32() as usize;
69+
println!("skipping array with length {}", length);
70+
for _ in 0..length {
71+
skip_field_value(context, field_type.generics.first().unwrap())?;
72+
}
73+
} else if type_id == TypeId::MAP {
74+
let length = context.reader.var_int32() as usize;
75+
for _ in 0..length {
76+
skip_field_value(context, field_type.generics.first().unwrap())?;
77+
skip_field_value(context, field_type.generics.get(1).unwrap())?;
78+
}
79+
}
80+
Ok(())
81+
} else if ref_flag == (RefFlag::Null as i8) {
82+
Err(anyhow!("Try to deserialize non-option type to null"))?
83+
} else if ref_flag == (RefFlag::Ref as i8) {
84+
Err(Error::Ref)
85+
} else {
86+
Err(anyhow!("Unknown ref flag, value:{ref_flag}"))?
87+
}
88+
} else {
89+
unreachable!()
90+
}
91+
}
92+
Err(_) => {
93+
// skip ref_flag and meta_index
94+
context.reader.i8();
95+
context.reader.i16();
96+
let type_defs: Vec<_> = context.meta_resolver.reading_type_defs.to_vec();
97+
for type_def in type_defs.iter() {
98+
if type_def.get_type_id() == field_type.type_id {
99+
let field_infos: Vec<_> = type_def.get_field_infos().to_vec();
100+
for field_info in field_infos.iter() {
101+
skip_field_value(context, &field_info.field_type)?;
102+
}
103+
}
104+
}
105+
Ok(())
106+
}
107+
}
108+
}

rust/fory-core/src/types.rs

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -112,19 +112,20 @@ pub fn compute_string_hash(s: &str) -> u32 {
112112
hash as u32
113113
}
114114

115-
// const BASIC_TYPES: [FieldType; 11] = [
116-
// FieldType::BOOL,
117-
// FieldType::INT8,
118-
// FieldType::INT16,
119-
// FieldType::INT32,
120-
// FieldType::INT64,
121-
// FieldType::FLOAT,
122-
// FieldType::DOUBLE,
123-
// FieldType::STRING,
124-
// FieldType::BINARY,
125-
// FieldType::DATE,
126-
// FieldType::TIMESTAMP,
127-
// ];
115+
pub const BASIC_TYPES: [TypeId; 10] = [
116+
TypeId::BOOL,
117+
TypeId::INT8,
118+
TypeId::INT16,
119+
TypeId::INT32,
120+
TypeId::INT64,
121+
TypeId::FLOAT32,
122+
TypeId::FLOAT64,
123+
TypeId::STRING,
124+
TypeId::LOCAL_DATE,
125+
TypeId::TIMESTAMP,
126+
];
127+
128+
pub const COLLECTION_TYPES: [TypeId; 3] = [TypeId::ARRAY, TypeId::SET, TypeId::MAP];
128129

129130
pub fn compute_field_hash(hash: u32, id: i16) -> u32 {
130131
let mut new_hash: u64 = (hash as u64) * 31 + (id as u64);

rust/fory-derive/src/object/derive_enum.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use syn::DataEnum;
2121

2222
pub fn gen_type_def(_data_enum: &DataEnum) -> TokenStream {
2323
quote! {
24-
fn type_def(fory: &fory_core::fory::Fory) -> Vec<u8> {
24+
fn type_def(fory: &fory_core::fory::Fory, type_id: i16) -> Vec<u8> {
2525
Vec::new()
2626
}
2727
}

rust/fory-derive/src/object/misc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ fn type_def(fields: &[&Field]) -> TokenStream {
5656
}
5757
});
5858
quote! {
59-
fn type_def(fory: &fory_core::fory::Fory) -> Vec<u8> {
59+
fn type_def(fory: &fory_core::fory::Fory, layer_id: i16) -> Vec<u8> {
6060
fory_core::meta::TypeMeta::from_fields(
61-
0,
61+
layer_id,
6262
vec![#(#field_infos),*]
6363
).to_bytes().unwrap()
6464
}

0 commit comments

Comments
 (0)