Skip to content

Commit 3e2ed31

Browse files
authored
contentviews: fix protobuf number encoding (#255)
* contentviews: fix protobuf number encoding * fix nits
1 parent 6712553 commit 3e2ed31

6 files changed

Lines changed: 142 additions & 69 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
## Unreleased: mitmproxy_rs next
22

3+
- Various fixes for Protobuf number encoding.
34

45
## 29 April 2025: mitmproxy_rs 0.12.2
56

mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value {
1414
let mut ret = Mapping::new();
1515

1616
for field in message.descriptor_dyn().fields() {
17-
let key = if field.name().starts_with("unknown_field_") {
17+
let is_unknown_field = field.name().starts_with("unknown_field_");
18+
let key = if is_unknown_field {
1819
Value::from(field.number())
1920
} else {
2021
Value::from(field.name())
@@ -28,7 +29,7 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value {
2829
let value = match field.get_reflect(message) {
2930
ReflectFieldRef::Optional(x) => {
3031
if let Some(x) = x.value() {
31-
value_to_yaml(x, field_type)
32+
value_to_yaml(x, field_type, is_unknown_field)
3233
} else {
3334
continue;
3435
}
@@ -39,7 +40,7 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value {
3940
}
4041
Value::Sequence(
4142
x.into_iter()
42-
.map(|x| value_to_yaml(x, field_type))
43+
.map(|x| value_to_yaml(x, field_type, is_unknown_field))
4344
.collect(),
4445
)
4546
}
@@ -49,7 +50,12 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value {
4950
}
5051
Value::Mapping(
5152
x.into_iter()
52-
.map(|(k, v)| (value_to_yaml(k, field_type), value_to_yaml(v, field_type)))
53+
.map(|(k, v)| {
54+
(
55+
value_to_yaml(k, field_type, is_unknown_field),
56+
value_to_yaml(v, field_type, is_unknown_field),
57+
)
58+
})
5359
.collect(),
5460
)
5561
}
@@ -59,10 +65,10 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value {
5965
Value::Mapping(ret)
6066
}
6167

62-
fn value_to_yaml(x: ReflectValueRef, field_type: Type) -> Value {
68+
fn value_to_yaml(x: ReflectValueRef, field_type: Type, is_unknown: bool) -> Value {
6369
match x {
64-
ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type),
65-
ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type),
70+
ReflectValueRef::U32(x) => tag_number(Number::from(x), field_type, is_unknown),
71+
ReflectValueRef::U64(x) => tag_number(Number::from(x), field_type, is_unknown),
6672
ReflectValueRef::I32(x) => Value::Number(Number::from(x)),
6773
ReflectValueRef::I64(x) => Value::Number(Number::from(x)),
6874
ReflectValueRef::F32(x) => Value::Number(Number::from(x)),
@@ -81,20 +87,23 @@ fn value_to_yaml(x: ReflectValueRef, field_type: Type) -> Value {
8187
}
8288
}
8389

84-
fn tag_number(value: Value, field_type: Type) -> Value {
90+
fn tag_number(number: Number, field_type: Type, is_unknown: bool) -> Value {
91+
if !is_unknown {
92+
return Value::Number(number);
93+
}
8594
match field_type {
8695
TYPE_UINT64 => Value::Tagged(Box::new(TaggedValue {
8796
tag: tags::VARINT.clone(),
88-
value,
97+
value: Value::Number(number),
8998
})),
9099
TYPE_FIXED64 => Value::Tagged(Box::new(TaggedValue {
91100
tag: tags::FIXED64.clone(),
92-
value,
101+
value: Value::Number(number),
93102
})),
94103
TYPE_FIXED32 => Value::Tagged(Box::new(TaggedValue {
95104
tag: tags::FIXED32.clone(),
96-
value,
105+
value: Value::Number(number),
97106
})),
98-
_ => value,
107+
_ => Value::Number(number),
99108
}
100109
}

mitmproxy-contentviews/src/protobuf/reencode.rs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,34 @@ fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> anyh
7878
.context("Invalid hex string")?;
7979
UnknownValue::LengthDelimited(value)
8080
} else if t.tag == *tags::FIXED32 {
81-
let value = match t.value {
82-
Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(),
83-
_ => bail!("Fixed32 data is not a u32"),
81+
let Value::Number(n) = t.value else {
82+
bail!("!fixed32 is not a number");
8483
};
85-
UnknownValue::Fixed32(value as u32)
84+
let value = n
85+
.as_u64()
86+
.map(|n| n as u32)
87+
.or_else(|| n.as_i64().map(|s| s as u32))
88+
.or_else(|| n.as_f64().map(|f| (f as f32).to_bits()))
89+
.context("Failed to convert !fixed32 value to a valid number")?;
90+
UnknownValue::Fixed32(value)
8691
} else if t.tag == *tags::FIXED64 {
87-
let value = match t.value {
88-
Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(),
89-
_ => bail!("Fixed64 data is not a u64"),
92+
let Value::Number(n) = t.value else {
93+
bail!("!fixed64 is not a number");
9094
};
95+
let value = n
96+
.as_u64()
97+
.or_else(|| n.as_i64().map(|s| s as u64))
98+
.or_else(|| n.as_f64().map(|f| f.to_bits()))
99+
.context("Failed to convert !fixed64 value to a valid number")?;
91100
UnknownValue::Fixed64(value)
101+
} else if t.tag == *tags::ZIGZAG {
102+
let Value::Number(n) = t.value else {
103+
bail!("!sint is not a number");
104+
};
105+
let Some(n) = n.as_i64() else {
106+
bail!("!sint is not an integer");
107+
};
108+
UnknownValue::Varint(encode_zigzag64(n))
92109
} else {
93110
log::info!("Unexpected YAML tag {}, discarding.", t.tag);
94111
return add_field(message, field_num, t.value);
@@ -154,3 +171,8 @@ fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue {
154171
UnknownValue::double(n.as_f64().expect("as_f64 never fails"))
155172
}
156173
}
174+
175+
// Zigzag-encode a 64-bit integer
176+
fn encode_zigzag64(n: i64) -> u64 {
177+
((n << 1) ^ (n >> 63)) as u64
178+
}

mitmproxy-contentviews/src/protobuf/view_grpc.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ mod tests {
116116
use super::*;
117117
use crate::test::TestMetadata;
118118

119-
const TEST_YAML: &str = "1: 150\n\n---\n\n1: 150\n";
119+
const TEST_YAML: &str = "1: 150 # !sint: 75\n\n---\n\n1: 150 # !sint: 75\n";
120120
const TEST_GRPC: &[u8] = &[
121121
0, 0, 0, 0, 3, 8, 150, 1, // first message
122122
0, 0, 0, 0, 3, 8, 150, 1, // second message
@@ -149,14 +149,14 @@ mod tests {
149149
fn test_prettify_gzip() {
150150
let metadata = TestMetadata::default().with_header("grpc-encoding", "gzip");
151151
let res = GRPC.prettify(TEST_GZIP, &metadata).unwrap();
152-
assert_eq!(res, "1: 150\n");
152+
assert_eq!(res, "1: 150 # !sint: 75\n");
153153
}
154154

155155
#[test]
156156
fn test_prettify_deflate() {
157157
let metadata = TestMetadata::default().with_header("grpc-encoding", "deflate");
158158
let res = GRPC.prettify(TEST_DEFLATE, &metadata).unwrap();
159-
assert_eq!(res, "1: 150\n");
159+
assert_eq!(res, "1: 150 # !sint: 75\n");
160160
}
161161

162162
#[test]

mitmproxy-contentviews/src/protobuf/view_protobuf.rs

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pub(super) mod tags {
1515

1616
pub static BINARY: LazyLock<Tag> = LazyLock::new(|| Tag::new("binary"));
1717
pub static VARINT: LazyLock<Tag> = LazyLock::new(|| Tag::new("varint"));
18+
pub static ZIGZAG: LazyLock<Tag> = LazyLock::new(|| Tag::new("sint"));
1819
pub static FIXED32: LazyLock<Tag> = LazyLock::new(|| Tag::new("fixed32"));
1920
pub static FIXED64: LazyLock<Tag> = LazyLock::new(|| Tag::new("fixed64"));
2021

@@ -122,30 +123,35 @@ mod tests {
122123
};
123124
}
124125

125-
test_roundtrip!(varint, b"\x08\x96\x01", "1: 150\n");
126-
test_roundtrip!(varint_negative, b"\x08\x0B", "1: 11 # signed: -6\n");
126+
test_roundtrip!(varint, b"\x08\x96\x01", "1: 150 # !sint: 75\n");
127+
test_roundtrip!(varint_zigzag_negative, b"\x08\x0B", "1: 11 # !sint: -6\n");
128+
test_roundtrip!(
129+
varint_int64_negative,
130+
b"\x08\xfe\xff\xff\xff\xff\xff\xff\xff\xff\x01",
131+
"1: -2 # u64: 18446744073709551614\n"
132+
);
127133
test_roundtrip!(binary, b"\x32\x03\x01\x02\x03", "6: !binary '010203'\n");
128134
test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F", "1: hello\n");
129-
test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42\n");
135+
test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42 # !sint: 21\n");
130136
test_roundtrip!(
131137
nested_twice,
132138
b"\x2A\x04\x2A\x02\x08\x2A",
133-
"5:\n 5:\n 1: 42\n"
139+
"5:\n 5:\n 1: 42 # !sint: 21\n"
134140
);
135141
test_roundtrip!(
136142
fixed64,
137143
b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF",
138-
"3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n"
144+
"3: !fixed64 -1.0 # u64: 13830554455654793216, i64: -4616189618054758400\n"
139145
);
140146
test_roundtrip!(
141147
fixed64_positive,
142148
b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40",
143-
"3: !fixed64 4614256650576692846 # double: 3.14159\n"
149+
"3: !fixed64 3.14159 # u64: 4614256650576692846\n"
144150
);
145151
test_roundtrip!(
146152
fixed64_no_float,
147153
b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF",
148-
"3: !fixed64 18446744073709551615 # i64: -1\n"
154+
"3: !fixed64 -1 # u64: 18446744073709551615\n"
149155
);
150156
test_roundtrip!(
151157
fixed64_positive_no_float,
@@ -155,17 +161,17 @@ mod tests {
155161
test_roundtrip!(
156162
fixed32,
157163
b"\x15\x00\x00\x80\xBF",
158-
"2: !fixed32 3212836864 # float: -1, i32: -1082130432\n"
164+
"2: !fixed32 -1.0 # u32: 3212836864, i32: -1082130432\n"
159165
);
160166
test_roundtrip!(
161167
fixed32_positive,
162168
b"\x15\xD0\x0F\x49\x40",
163-
"2: !fixed32 1078530000 # float: 3.14159\n"
169+
"2: !fixed32 3.14159 # u32: 1078530000\n"
164170
);
165171
test_roundtrip!(
166172
fixed32_no_float,
167173
b"\x15\xFF\xFF\xFF\xFF",
168-
"2: !fixed32 4294967295 # i32: -1\n"
174+
"2: !fixed32 -1 # u32: 4294967295\n"
169175
);
170176
test_roundtrip!(
171177
fixed32_positive_no_float,
@@ -182,7 +188,7 @@ mod tests {
182188
test_roundtrip!(
183189
repeated_varint,
184190
b"\x08\x01\x08\x02\x08\x03",
185-
"1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"
191+
"1:\n- 1 # !sint: -1\n- 2 # !sint: 1\n- 3 # !sint: -2\n"
186192
);
187193

188194
#[test]

mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs

Lines changed: 71 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,101 @@
11
/// YAML value => prettified text
22
use crate::protobuf::view_protobuf::tags;
33
use regex::Captures;
4+
use std::fmt::{Display, Formatter};
5+
6+
/// Collect all representations of a number and output the "best" one as the YAML value
7+
/// and the rest as comments.
8+
struct NumReprs(Vec<(&'static str, String)>);
9+
10+
impl NumReprs {
11+
fn new(k: &'static str, v: impl ToString) -> Self {
12+
let mut inst = Self(Vec::with_capacity(3));
13+
inst.push(k, v);
14+
inst
15+
}
16+
fn push(&mut self, k: &'static str, v: impl ToString) {
17+
self.0.push((k, v.to_string()));
18+
}
19+
}
20+
21+
impl Display for NumReprs {
22+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
23+
// We first sort by t.len(), which is a hack to make sure that sint is not used
24+
// as the main representation.
25+
let (min_typ, min_val) = self
26+
.0
27+
.iter()
28+
.min_by_key(|(t, v)| (t.len(), v.len()))
29+
.unwrap();
30+
let mut i = self.0.iter().filter(|(t, _)| t != min_typ);
31+
32+
write!(f, "{}", min_val)?;
33+
if let Some((t, v)) = i.next() {
34+
write!(f, " # {}: {}", t, v)?;
35+
}
36+
for (t, v) in i {
37+
write!(f, ", {}: {}", t, v)?;
38+
}
39+
Ok(())
40+
}
41+
}
442

543
// Helper method to apply regex replacements to the YAML output
644
pub(super) fn apply_replacements(yaml_str: &str) -> anyhow::Result<String> {
745
// Replace !fixed32 tags with comments showing float and i32 interpretations
846
let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| {
947
let value = caps[1].parse::<u32>().unwrap_or_default();
48+
let mut repr = NumReprs::new("u32", value);
49+
1050
let float_value = f32::from_bits(value);
11-
let i32_value = value as i32;
51+
if !float_value.is_nan() {
52+
let mut float = format!("{}", float_value);
53+
if !float.contains(".") {
54+
float.push_str(".0");
55+
}
56+
repr.push("f32", float);
57+
}
1258

13-
if !float_value.is_nan() && float_value < 0.0 {
14-
format!(
15-
"{} {} # float: {}, i32: {}",
16-
*tags::FIXED32,
17-
value,
18-
float_value,
19-
i32_value
20-
)
21-
} else if !float_value.is_nan() {
22-
format!("{} {} # float: {}", *tags::FIXED32, value, float_value)
23-
} else if i32_value < 0 {
24-
format!("{} {} # i32: {}", *tags::FIXED32, value, i32_value)
25-
} else {
26-
format!("{} {}", *tags::FIXED32, value)
59+
if value.leading_zeros() == 0 {
60+
repr.push("i32", value as i32);
2761
}
62+
format!("{} {}", *tags::FIXED32, repr)
2863
});
2964

3065
// Replace !fixed64 tags with comments showing double and i64 interpretations
3166
let with_fixed64 = tags::FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| {
3267
let value = caps[1].parse::<u64>().unwrap_or_default();
68+
let mut repr = NumReprs::new("u64", value);
69+
3370
let double_value = f64::from_bits(value);
34-
let i64_value = value as i64;
71+
if !double_value.is_nan() {
72+
let mut double = format!("{}", double_value);
73+
if !double.contains(".") {
74+
double.push_str(".0");
75+
}
76+
repr.push("f64", double);
77+
}
3578

36-
if !double_value.is_nan() && double_value < 0.0 {
37-
format!(
38-
"{} {} # double: {}, i64: {}",
39-
*tags::FIXED64,
40-
value,
41-
double_value,
42-
i64_value
43-
)
44-
} else if !double_value.is_nan() {
45-
format!("{} {} # double: {}", *tags::FIXED64, value, double_value)
46-
} else if i64_value < 0 {
47-
format!("{} {} # i64: {}", *tags::FIXED64, value, i64_value)
48-
} else {
49-
format!("{} {}", *tags::FIXED64, value)
79+
if value.leading_zeros() == 0 {
80+
repr.push("i64", value as i64);
5081
}
82+
format!("{} {}", *tags::FIXED64, repr)
5183
});
5284

5385
// Replace !varint tags with comments showing signed interpretation if different
5486
let with_varint = tags::VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| {
55-
let unsigned_value = caps[1].parse::<u64>().unwrap_or_default();
56-
let i64_zigzag = decode_zigzag64(unsigned_value);
87+
let value = caps[1].parse::<u64>().unwrap_or_default();
88+
let mut repr = NumReprs::new("u64", value);
5789

58-
// Only show signed value if it's different from unsigned
59-
if i64_zigzag < 0 {
60-
format!("{} # signed: {}", unsigned_value, i64_zigzag)
90+
if value.leading_zeros() == 0 {
91+
repr.push("i64", value as i64);
92+
// We only show u64 and i64 reprs if the leading bit is a 1.
93+
// It could technically be zigzag, but the odds are quite low.
6194
} else {
62-
unsigned_value.to_string()
95+
repr.push("!sint", decode_zigzag64(value));
6396
}
97+
98+
repr.to_string()
6499
});
65100

66101
Ok(with_varint.to_string())

0 commit comments

Comments
 (0)