Skip to content

Commit f9e8a66

Browse files
Merge pull request #46 from andersfugmann/andersfugmann/fix_zigzag_decoding
Andersfugmann/fix zigzag decoding
2 parents 27b7eda + 7d742e6 commit f9e8a66

File tree

5 files changed

+153
-42
lines changed

5 files changed

+153
-42
lines changed

Changelog.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
behaviour.
88
- Improve copying of comments from .proto files into ocaml code using
99
omd to parse markdown
10-
10+
- Fix problem with deserializing large singed integers (#45)
1111

1212
## 6.1.0: 2024-04-25
1313
- Fix name resolution leading to wrongly mapped names

src/ocaml_protoc_plugin/deserialize.ml

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ let error_required_field_missing index spec = Result.raise (`Required_field_miss
3030
let decode_zigzag v =
3131
let open Infix.Int64 in
3232
match v land 0x01L = 0L with
33-
| true -> v / 2L
34-
| false -> (v / 2L * -1L) - 1L
33+
| true -> v lsr 1
34+
| false -> (v lsr 1 * -1L) - 1L
3535

3636
let decode_zigzag_unboxed v =
3737
match v land 0x01 = 0 with
38-
| true -> v / 2
39-
| false -> (v / 2 * -1) - 1
38+
| true -> v lsr 1
39+
| false -> (v lsr 1 * -1) - 1
4040

4141
let int_of_uint32 =
4242
let open Int32 in
@@ -326,3 +326,59 @@ let deserialize_fast: type constr a. (constr, a) compound_list -> constr -> Read
326326
let extension_ranges = extension_ranges spec in
327327
let values = make_values spec in
328328
fun reader -> deserialize_fast extension_ranges values constr reader
329+
330+
let%expect_test "zigzag decoding" =
331+
let n2l = Int64.of_int in
332+
let i2l = Int64.of_int32 in
333+
let test_values =
334+
[Int64.min_int; n2l Int.min_int; i2l Int32.min_int; -2L;
335+
0L; 3L; i2l Int32.max_int; n2l Int.max_int; Int64.max_int]
336+
|> List.map ~f:(
337+
let open Infix.Int64 in
338+
function
339+
| v when v > 0L -> [pred v; v]
340+
| v -> [v; succ v]
341+
)
342+
|> List.concat
343+
in
344+
List.iter ~f:(fun vl -> Printf.printf "zigzag_decoding(0x%016Lx) = 0x%016Lx\n" vl (decode_zigzag vl)) test_values;
345+
List.iter ~f:(fun v -> Printf.printf "zigzag_decoding_unboxed(0x%016x) = 0x%016x\n" (Int64.to_int v) (Int64.to_int v |> decode_zigzag_unboxed)) test_values;
346+
(* The results should display alternation between positive and negative numbers. If the right most bit is set, the number is negative *)
347+
[%expect {|
348+
zigzag_decoding(0x8000000000000000) = 0x4000000000000000
349+
zigzag_decoding(0x8000000000000001) = 0xbfffffffffffffff
350+
zigzag_decoding(0xc000000000000000) = 0x6000000000000000
351+
zigzag_decoding(0xc000000000000001) = 0x9fffffffffffffff
352+
zigzag_decoding(0xffffffff80000000) = 0x7fffffffc0000000
353+
zigzag_decoding(0xffffffff80000001) = 0x800000003fffffff
354+
zigzag_decoding(0xfffffffffffffffe) = 0x7fffffffffffffff
355+
zigzag_decoding(0xffffffffffffffff) = 0x8000000000000000
356+
zigzag_decoding(0x0000000000000000) = 0x0000000000000000
357+
zigzag_decoding(0x0000000000000001) = 0xffffffffffffffff
358+
zigzag_decoding(0x0000000000000002) = 0x0000000000000001
359+
zigzag_decoding(0x0000000000000003) = 0xfffffffffffffffe
360+
zigzag_decoding(0x000000007ffffffe) = 0x000000003fffffff
361+
zigzag_decoding(0x000000007fffffff) = 0xffffffffc0000000
362+
zigzag_decoding(0x3ffffffffffffffe) = 0x1fffffffffffffff
363+
zigzag_decoding(0x3fffffffffffffff) = 0xe000000000000000
364+
zigzag_decoding(0x7ffffffffffffffe) = 0x3fffffffffffffff
365+
zigzag_decoding(0x7fffffffffffffff) = 0xc000000000000000
366+
zigzag_decoding_unboxed(0x0000000000000000) = 0x0000000000000000
367+
zigzag_decoding_unboxed(0x0000000000000001) = 0x7fffffffffffffff
368+
zigzag_decoding_unboxed(0x4000000000000000) = 0x2000000000000000
369+
zigzag_decoding_unboxed(0x4000000000000001) = 0x5fffffffffffffff
370+
zigzag_decoding_unboxed(0x7fffffff80000000) = 0x3fffffffc0000000
371+
zigzag_decoding_unboxed(0x7fffffff80000001) = 0x400000003fffffff
372+
zigzag_decoding_unboxed(0x7ffffffffffffffe) = 0x3fffffffffffffff
373+
zigzag_decoding_unboxed(0x7fffffffffffffff) = 0x4000000000000000
374+
zigzag_decoding_unboxed(0x0000000000000000) = 0x0000000000000000
375+
zigzag_decoding_unboxed(0x0000000000000001) = 0x7fffffffffffffff
376+
zigzag_decoding_unboxed(0x0000000000000002) = 0x0000000000000001
377+
zigzag_decoding_unboxed(0x0000000000000003) = 0x7ffffffffffffffe
378+
zigzag_decoding_unboxed(0x000000007ffffffe) = 0x000000003fffffff
379+
zigzag_decoding_unboxed(0x000000007fffffff) = 0x7fffffffc0000000
380+
zigzag_decoding_unboxed(0x3ffffffffffffffe) = 0x1fffffffffffffff
381+
zigzag_decoding_unboxed(0x3fffffffffffffff) = 0x6000000000000000
382+
zigzag_decoding_unboxed(0x7ffffffffffffffe) = 0x3fffffffffffffff
383+
zigzag_decoding_unboxed(0x7fffffffffffffff) = 0x4000000000000000
384+
|}]

src/ocaml_protoc_plugin/infix.ml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ module Int64 = struct
99
let (/) = div
1010
let ( * ) = mul
1111
let (-) = sub
12+
let succ = succ
13+
let pred = pred
1214
end
1315

1416
module Int = struct
@@ -22,4 +24,6 @@ module Int = struct
2224
let (/) = div
2325
let ( * ) = mul
2426
let (-) = sub
27+
let succ = succ
28+
let pred = pred
2529
end

src/ocaml_protoc_plugin/serialize.ml

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ let write_fixed64 ~f v =
1616
let write_fixed32 ~f v =
1717
Writer.write_fixed32_value (f v)
1818

19-
let zigzag_encoding v =
19+
let encode_zigzag v =
2020
let open Infix.Int64 in
2121
let v = match v < 0L with
2222
| true -> v lsl 1 lxor (-1L)
2323
| false -> v lsl 1
2424
in
2525
v
2626

27-
let zigzag_encoding_unboxed v =
27+
let encode_zigzag_unboxed v =
2828
let v = match v < 0 with
2929
| true -> v lsl 1 lxor (-1)
3030
| false -> v lsl 1
@@ -55,16 +55,16 @@ let write_value : type a b. (a, b) spec -> a -> Writer.t -> unit = function
5555
| SFixed32_int -> write_fixed32 ~f:Int32.of_int
5656
| Int64 -> Writer.write_varint_value
5757
| UInt64 -> Writer.write_varint_value
58-
| SInt64 -> write_varint ~f:zigzag_encoding
58+
| SInt64 -> write_varint ~f:encode_zigzag
5959
| Int32 -> write_varint_unboxed ~f:Int32.to_int
6060
| UInt32 -> write_varint_unboxed ~f:Int32.to_int
61-
| SInt32 -> write_varint_unboxed ~f:(Int32.to_int @@ zigzag_encoding_unboxed)
61+
| SInt32 -> write_varint_unboxed ~f:(Int32.to_int @@ encode_zigzag_unboxed)
6262
| Int64_int -> Writer.write_varint_unboxed_value
6363
| UInt64_int -> Writer.write_varint_unboxed_value
6464
| Int32_int -> Writer.write_varint_unboxed_value
6565
| UInt32_int -> Writer.write_varint_unboxed_value
66-
| SInt64_int -> write_varint_unboxed ~f:zigzag_encoding_unboxed
67-
| SInt32_int -> write_varint_unboxed ~f:zigzag_encoding_unboxed
66+
| SInt64_int -> write_varint_unboxed ~f:encode_zigzag_unboxed
67+
| SInt32_int -> write_varint_unboxed ~f:encode_zigzag_unboxed
6868

6969
| Bool -> write_varint_unboxed ~f:(function true -> 1 | false -> 0)
7070
| String -> fun v -> Writer.write_length_delimited_value ~data:v ~offset:0 ~len:(String.length v)
@@ -177,28 +177,56 @@ let rec serialize: type a. (a, unit) compound_list -> Writer.t -> a = function
177177
cont writer
178178

179179
let%expect_test "zigzag encoding" =
180-
let test vl =
181-
let v = Int64.to_int vl in
182-
Printf.printf "zigzag_encoding(%LdL) = %LdL\n" vl (zigzag_encoding vl);
183-
Printf.printf "zigzag_encoding_unboxed(%d) = %d\n" v (zigzag_encoding_unboxed v);
180+
let n2l = Int64.of_int in
181+
let i2l = Int64.of_int32 in
182+
let test_values =
183+
[Int64.min_int; n2l Int.min_int; i2l Int32.min_int; -2L;
184+
0L; 3L; i2l Int32.max_int; n2l Int.max_int; Int64.max_int]
185+
|> List.map ~f:(
186+
let open Infix.Int64 in
187+
function
188+
| v when v > 0L -> [pred v; v]
189+
| v -> [v; succ v]
190+
)
191+
|> List.concat
184192
in
185-
List.iter ~f:test [0L; -1L; 1L; -2L; 2L; 2147483647L; -2147483648L; Int64.max_int; Int64.min_int; ];
193+
List.iter ~f:(fun vl -> Printf.printf "zigzag_encoding 0x%016Lx = 0x%016Lx\n" vl (encode_zigzag vl)) test_values;
194+
List.iter ~f:(fun v -> Printf.printf "zigzag_encoding_unboxed 0x%016x = 0x%016x\n" (Int64.to_int v) (Int64.to_int v |> encode_zigzag_unboxed)) test_values;
186195
[%expect {|
187-
zigzag_encoding(0L) = 0L
188-
zigzag_encoding_unboxed(0) = 0
189-
zigzag_encoding(-1L) = 1L
190-
zigzag_encoding_unboxed(-1) = 1
191-
zigzag_encoding(1L) = 2L
192-
zigzag_encoding_unboxed(1) = 2
193-
zigzag_encoding(-2L) = 3L
194-
zigzag_encoding_unboxed(-2) = 3
195-
zigzag_encoding(2L) = 4L
196-
zigzag_encoding_unboxed(2) = 4
197-
zigzag_encoding(2147483647L) = 4294967294L
198-
zigzag_encoding_unboxed(2147483647) = 4294967294
199-
zigzag_encoding(-2147483648L) = 4294967295L
200-
zigzag_encoding_unboxed(-2147483648) = 4294967295
201-
zigzag_encoding(9223372036854775807L) = -2L
202-
zigzag_encoding_unboxed(-1) = 1
203-
zigzag_encoding(-9223372036854775808L) = -1L
204-
zigzag_encoding_unboxed(0) = 0 |}]
196+
zigzag_encoding 0x8000000000000000 = 0xffffffffffffffff
197+
zigzag_encoding 0x8000000000000001 = 0xfffffffffffffffd
198+
zigzag_encoding 0xc000000000000000 = 0x7fffffffffffffff
199+
zigzag_encoding 0xc000000000000001 = 0x7ffffffffffffffd
200+
zigzag_encoding 0xffffffff80000000 = 0x00000000ffffffff
201+
zigzag_encoding 0xffffffff80000001 = 0x00000000fffffffd
202+
zigzag_encoding 0xfffffffffffffffe = 0x0000000000000003
203+
zigzag_encoding 0xffffffffffffffff = 0x0000000000000001
204+
zigzag_encoding 0x0000000000000000 = 0x0000000000000000
205+
zigzag_encoding 0x0000000000000001 = 0x0000000000000002
206+
zigzag_encoding 0x0000000000000002 = 0x0000000000000004
207+
zigzag_encoding 0x0000000000000003 = 0x0000000000000006
208+
zigzag_encoding 0x000000007ffffffe = 0x00000000fffffffc
209+
zigzag_encoding 0x000000007fffffff = 0x00000000fffffffe
210+
zigzag_encoding 0x3ffffffffffffffe = 0x7ffffffffffffffc
211+
zigzag_encoding 0x3fffffffffffffff = 0x7ffffffffffffffe
212+
zigzag_encoding 0x7ffffffffffffffe = 0xfffffffffffffffc
213+
zigzag_encoding 0x7fffffffffffffff = 0xfffffffffffffffe
214+
zigzag_encoding_unboxed 0x0000000000000000 = 0x0000000000000000
215+
zigzag_encoding_unboxed 0x0000000000000001 = 0x0000000000000002
216+
zigzag_encoding_unboxed 0x4000000000000000 = 0x7fffffffffffffff
217+
zigzag_encoding_unboxed 0x4000000000000001 = 0x7ffffffffffffffd
218+
zigzag_encoding_unboxed 0x7fffffff80000000 = 0x00000000ffffffff
219+
zigzag_encoding_unboxed 0x7fffffff80000001 = 0x00000000fffffffd
220+
zigzag_encoding_unboxed 0x7ffffffffffffffe = 0x0000000000000003
221+
zigzag_encoding_unboxed 0x7fffffffffffffff = 0x0000000000000001
222+
zigzag_encoding_unboxed 0x0000000000000000 = 0x0000000000000000
223+
zigzag_encoding_unboxed 0x0000000000000001 = 0x0000000000000002
224+
zigzag_encoding_unboxed 0x0000000000000002 = 0x0000000000000004
225+
zigzag_encoding_unboxed 0x0000000000000003 = 0x0000000000000006
226+
zigzag_encoding_unboxed 0x000000007ffffffe = 0x00000000fffffffc
227+
zigzag_encoding_unboxed 0x000000007fffffff = 0x00000000fffffffe
228+
zigzag_encoding_unboxed 0x3ffffffffffffffe = 0x7ffffffffffffffc
229+
zigzag_encoding_unboxed 0x3fffffffffffffff = 0x7ffffffffffffffe
230+
zigzag_encoding_unboxed 0x7ffffffffffffffe = 0x0000000000000003
231+
zigzag_encoding_unboxed 0x7fffffffffffffff = 0x0000000000000001
232+
|}]

test/int_types_native_test.ml

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ let proto_file = "int_types_native.proto"
55

66
let test_signed64 (type t) ~(create : Int64.t -> t) (module T : Test_lib.T with type t = t) =
77
Printf.printf "Test %s\n%!" (T.name ());
8-
let values = [-1073741823L; -2L; -1L; 0L; 1L; 2L; 1073741823L] in
8+
let values = [Int64.min_int; Int64.succ Int64.min_int; -1073741823L; -2L; -1L; 0L; 1L; 2L; 1073741823L; Int64.pred Int64.max_int; Int64.max_int] in
99
List.iter
1010
~f:(fun v -> Test_lib.test_encode ~proto_file (module T) (create v))
1111
values
@@ -19,14 +19,14 @@ let test_unsigned64 (type t) ~(create : Int64.t -> t) (module T : Test_lib.T wit
1919

2020
let test_signed32 (type t) ~(create : Int32.t -> t) (module T : Test_lib.T with type t = t) =
2121
Printf.printf "Test %s\n%!" (T.name ());
22-
let values = [-1073741823l; -2l; -1l; 0l; 1l; 2l; 1073741823l] in
22+
let values = [Int32.min_int; Int32.succ Int32.min_int; -1073741823l; -2l; -1l; 0l; 1l; 2l; 1073741823l; Int32.pred Int32.max_int; Int32.max_int] in
2323
List.iter
2424
~f:(fun v -> Test_lib.test_encode ~proto_file (module T) (create v))
2525
values
2626

2727
let test_unsigned32 (type t) ~(create : Int32.t -> t) (module T : Test_lib.T with type t = t) =
2828
Printf.printf "Test %s\n%!" (T.name ());
29-
let values = [0l; 1l; 2l; 2147483647l] in
29+
let values = [0l; 1l; 2l; 1073741823l; Int32.pred Int32.max_int; Int32.max_int] in
3030
List.iter
3131
~f:(fun v -> Test_lib.test_encode ~proto_file (module T) (create v))
3232
values
@@ -37,38 +37,53 @@ let%expect_test _ =
3737
test_signed64 ~create (module T);
3838
[%expect {|
3939
Test .int_types_native.SInt64
40+
i: -9223372036854775808
41+
i: -9223372036854775807
4042
i: -1073741823
4143
i: -2
4244
i: -1
4345
i: 1
4446
i: 2
45-
i: 1073741823 |}]
47+
i: 1073741823
48+
i: 9223372036854775806
49+
i: 9223372036854775807
50+
|}]
4651

4752
let%expect_test _ =
4853
let module T = Int_types_native.SInt32 in
4954
let create i = i in
5055
test_signed32 ~create (module T);
5156
[%expect {|
5257
Test .int_types_native.SInt32
58+
i: -2147483648
59+
i: -2147483647
5360
i: -1073741823
5461
i: -2
5562
i: -1
5663
i: 1
5764
i: 2
58-
i: 1073741823 |}]
65+
i: 1073741823
66+
i: 2147483646
67+
i: 2147483647
68+
|}]
5969

6070
let%expect_test _ =
6171
let module T = Int_types_native.Int64 in
6272
let create i = i in
6373
test_signed64 ~create (module T);
6474
[%expect {|
6575
Test .int_types_native.Int64
76+
i: -9223372036854775808
77+
i: -9223372036854775807
6678
i: -1073741823
6779
i: -2
6880
i: -1
6981
i: 1
7082
i: 2
71-
i: 1073741823 |}]
83+
i: 1073741823
84+
i: 9223372036854775806
85+
i: 9223372036854775807
86+
|}]
7287

7388
let%expect_test _ =
7489
let module T = Int_types_native.Int32 in
@@ -77,12 +92,17 @@ let%expect_test _ =
7792
[%expect
7893
{|
7994
Test .int_types_native.Int32
95+
i: -2147483648
96+
i: -2147483647
8097
i: -1073741823
8198
i: -2
8299
i: -1
83100
i: 1
84101
i: 2
85-
i: 1073741823 |}]
102+
i: 1073741823
103+
i: 2147483646
104+
i: 2147483647
105+
|}]
86106

87107
let%expect_test _ =
88108
let module T = Int_types_native.UInt64 in
@@ -102,4 +122,7 @@ let%expect_test _ =
102122
Test .int_types_native.UInt32
103123
i: 1
104124
i: 2
105-
i: 2147483647 |}]
125+
i: 1073741823
126+
i: 2147483646
127+
i: 2147483647
128+
|}]

0 commit comments

Comments
 (0)