Skip to content

Commit 5f6406f

Browse files
authored
[master] Optimize data cluster format for VHD and QCOW (#7106)
This is a forward-port of #6895 (with the exception of commits later reverted in #7030 and #7022). Changes to VHD code are gated behind a feature flag (with previous behaviour kept by default). XCP-ng has been running the new VHD and QCOW code for a while now with no issues. This PR needs to be merged before the corresponding xs-opam PR (xapi-project/xs-opam#768)
2 parents 457e48a + 5d8a076 commit 5f6406f

14 files changed

Lines changed: 452 additions & 172 deletions

File tree

ocaml/libs/vhd/vhd_format/f.ml

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2903,24 +2903,10 @@ functor
29032903

29042904
let raw ?from (vhd : fd Vhd.t) = raw_common ?from vhd
29052905

2906-
let vhd_blocks_to_json (t : fd Vhd.t) =
2906+
let vhd_blocks_to_json_aux (t : fd Vhd.t) blocks =
29072907
let block_size_sectors_shift =
29082908
t.Vhd.header.Header.block_size_sectors_shift
29092909
in
2910-
let max_table_entries = Vhd.used_max_table_entries t in
2911-
2912-
let include_block = include_block None t in
2913-
2914-
let blocks =
2915-
Seq.init max_table_entries Fun.id
2916-
|> Seq.filter_map (fun i ->
2917-
if include_block i then
2918-
Some (`Int i)
2919-
else
2920-
None
2921-
)
2922-
|> List.of_seq
2923-
in
29242910
let json =
29252911
`Assoc
29262912
[
@@ -2934,6 +2920,52 @@ functor
29342920
let json_string = Yojson.to_string json in
29352921
print_string json_string ; return ()
29362922

2923+
let vhd_blocks_to_json (t : fd Vhd.t) =
2924+
let max_table_entries = Vhd.used_max_table_entries t in
2925+
let blocks =
2926+
Seq.init max_table_entries Fun.id
2927+
|> Seq.filter_map (fun i ->
2928+
if include_block None t i then
2929+
Some (`Int i)
2930+
else
2931+
None
2932+
)
2933+
|> List.of_seq
2934+
in
2935+
vhd_blocks_to_json_aux t blocks
2936+
2937+
let vhd_blocks_to_json_interval (t : fd Vhd.t) =
2938+
let max_table_entries = Vhd.used_max_table_entries t in
2939+
let blocks, last_block =
2940+
Seq.init max_table_entries Fun.id
2941+
|> Seq.fold_left
2942+
(fun (acc, left_block) i ->
2943+
if include_block None t i then
2944+
match left_block with
2945+
| Some _ ->
2946+
(acc, left_block)
2947+
| None ->
2948+
(acc, Some i)
2949+
else
2950+
match left_block with
2951+
| Some x ->
2952+
(`List [`Int x; `Int (i - 1)] :: acc, None)
2953+
| None ->
2954+
(acc, None)
2955+
)
2956+
([], None)
2957+
in
2958+
(* Close off the interval we were tracking we ran off the end of the seq *)
2959+
let blocks =
2960+
match last_block with
2961+
| Some x ->
2962+
`List [`Int x; `Int (max_table_entries - 1)] :: blocks
2963+
| None ->
2964+
blocks
2965+
in
2966+
let blocks = List.rev blocks in
2967+
vhd_blocks_to_json_aux t blocks
2968+
29372969
let vhd_common ?from ?raw ?(emit_batmap = false) (t : fd Vhd.t) =
29382970
let block_size_sectors_shift =
29392971
t.Vhd.header.Header.block_size_sectors_shift
@@ -3173,6 +3205,8 @@ functor
31733205
Vhd_input.vhd_common ?from ~raw vhd
31743206

31753207
let blocks_json = Vhd_input.vhd_blocks_to_json
3208+
3209+
let blocks_json_interval = Vhd_input.vhd_blocks_to_json_interval
31763210
end
31773211

31783212
(* Create a VHD stream from data on t, using `include_block` guide us which blocks have data *)

ocaml/libs/vhd/vhd_format/f.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,8 @@ module From_file : functor (F : S.FILE) -> sig
470470
[from] into [t] *)
471471

472472
val blocks_json : fd Vhd.t -> unit t
473+
474+
val blocks_json_interval : fd Vhd.t -> unit t
473475
end
474476

475477
module Raw_input : sig

ocaml/qcow-stream-tool/qcow_stream_tool.ml

Lines changed: 1 addition & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4,52 +4,13 @@ module Impl = struct
44
let stream_decode output =
55
Qcow_stream.stream_decode Unix.stdin output ;
66
`Ok ()
7-
8-
let read_headers qcow_path =
9-
let open Lwt.Syntax in
10-
let t =
11-
let* fd = Lwt_unix.openfile qcow_path [Unix.O_RDONLY] 0 in
12-
let* virtual_size, cluster_bits, _, data_cluster_map =
13-
Qcow_stream.start_stream_decode fd
14-
in
15-
(* TODO: List.map becomes tail-recursive in OCaml 5.1, and could be used here instead *)
16-
let clusters =
17-
data_cluster_map
18-
|> Qcow_types.Cluster.Map.to_seq
19-
|> Seq.map (fun (_, virt_address) ->
20-
let ( >> ) = Int64.shift_right_logical in
21-
let address =
22-
Int64.to_int (virt_address >> Int32.to_int cluster_bits)
23-
in
24-
`Int address
25-
)
26-
|> List.of_seq
27-
in
28-
let json =
29-
`Assoc
30-
[
31-
("virtual_size", `Int (Int64.to_int virtual_size))
32-
; ("cluster_bits", `Int (Int32.to_int cluster_bits))
33-
; ("data_clusters", `List clusters)
34-
]
35-
in
36-
let json_string = Yojson.to_string json in
37-
let* () = Lwt_io.print json_string in
38-
let* () = Lwt_io.flush Lwt_io.stdout in
39-
Lwt.return_unit
40-
in
41-
Lwt_main.run t ; `Ok ()
427
end
438

449
module Cli = struct
4510
let output default =
4611
let doc = Printf.sprintf "Path to the output file." in
4712
Arg.(value & pos 0 string default & info [] ~doc)
4813

49-
let input =
50-
let doc = Printf.sprintf "Path to the input file." in
51-
Arg.(required & pos 0 (some string) None & info [] ~doc)
52-
5314
let stream_decode_cmd =
5415
let doc = "decode qcow2 formatted data from stdin and write a raw image" in
5516
let man =
@@ -62,18 +23,7 @@ module Cli = struct
6223
(Cmd.info "stream_decode" ~doc ~man)
6324
Term.(ret (const Impl.stream_decode $ output "test.raw"))
6425

65-
let read_headers_cmd =
66-
let doc =
67-
"Determine allocated clusters by parsing qcow2 file at the provided \
68-
path. Returns JSON like the following: {'virtual_size': X, \
69-
'cluster_bits': Y, 'data_clusters': [1,2,3]}"
70-
in
71-
let man = [`S "DESCRIPTION"; `P doc] in
72-
Cmd.v
73-
(Cmd.info "read_headers" ~doc ~man)
74-
Term.(ret (const Impl.read_headers $ input))
75-
76-
let cmds = [stream_decode_cmd; read_headers_cmd]
26+
let cmds = [stream_decode_cmd]
7727
end
7828

7929
let info =

ocaml/vhd-tool/cli/main.ml

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -385,19 +385,34 @@ let stream_cmd =
385385
, Cmd.info "stream" ~sdocs:_common_options ~doc ~man
386386
)
387387

388+
let vhd_source =
389+
let doc = Printf.sprintf "Path to the VHD file" in
390+
Arg.(required & pos 0 (some file) None & info [] ~doc)
391+
388392
let read_headers_cmd =
389393
let doc =
390394
{|Parse VHD headers and output allocated blocks information in JSON format \
391395
like: {"virtual_size": X, "cluster_bits": X, "data_clusters": [1,2,3]}|}
392396
in
393-
let source =
394-
let doc = Printf.sprintf "Path to the VHD file" in
395-
Arg.(required & pos 0 (some file) None & info [] ~doc)
396-
in
397-
( Term.(ret (const Impl.read_headers $ common_options_t $ source))
397+
( Term.(
398+
ret
399+
(const (Impl.read_headers ~legacy:true) $ common_options_t $ vhd_source)
400+
)
398401
, Cmd.info "read_headers" ~sdocs:_common_options ~doc
399402
)
400403

404+
let read_headers_interval_cmd =
405+
let doc =
406+
{|Parse VHD headers and output allocated blocks intervals information in JSON format \
407+
like: {"virtual_size": X, "cluster_bits": X, "data_clusters": [[1,13],[17,17],[19,272]]|}
408+
in
409+
( Term.(
410+
ret
411+
(const (Impl.read_headers ~legacy:false) $ common_options_t $ vhd_source)
412+
)
413+
, Cmd.info "read_headers_interval" ~sdocs:_common_options ~doc
414+
)
415+
401416
let cmds =
402417
[
403418
info_cmd
@@ -408,6 +423,7 @@ let cmds =
408423
; serve_cmd
409424
; stream_cmd
410425
; read_headers_cmd
426+
; read_headers_interval_cmd
411427
]
412428
|> List.map (fun (t, i) -> Cmd.v i t)
413429

ocaml/vhd-tool/src/impl.ml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,11 +1168,15 @@ let stream_t common args ?(progress = no_progress_bar) () =
11681168
args.StreamCommon.tar_filename_prefix args.StreamCommon.good_ciphersuites
11691169
args.StreamCommon.verify_cert
11701170

1171-
let read_headers common source =
1171+
let read_headers common source ~legacy =
11721172
let path = [Filename.dirname source] in
11731173
let thread =
11741174
retry common 3 (fun () -> Vhd_IO.openchain ~path source false) >>= fun t ->
1175-
Vhd_IO.close t >>= fun () -> Hybrid_input.blocks_json t
1175+
Vhd_IO.close t >>= fun () ->
1176+
if legacy then
1177+
Hybrid_input.blocks_json t
1178+
else
1179+
Hybrid_input.blocks_json_interval t
11761180
in
11771181
Lwt_main.run thread ; `Ok ()
11781182

ocaml/vhd-tool/src/impl.mli

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ val stream :
3636
Common.t -> StreamCommon.t -> [> `Error of bool * string | `Ok of unit]
3737

3838
val read_headers :
39-
Common.t -> string -> [> `Error of bool * string | `Ok of unit]
39+
Common.t -> string -> legacy:bool -> [> `Error of bool * string | `Ok of unit]
4040

4141
val serve :
4242
Common.t

ocaml/xapi/qcow_tool_wrapper.ml

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,43 @@ let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr)
2222
Vhd_qcow_parsing.run_tool qcow_tool progress_cb args ~input_fd:unix_fd
2323

2424
let read_header qcow_path =
25-
let args = ["read_headers"; qcow_path] in
26-
let qcow_tool = !Xapi_globs.qcow_stream_tool in
27-
let pipe_reader, pipe_writer = Unix.pipe ~cloexec:true () in
28-
2925
let progress_cb _ = () in
30-
let (_ : Thread.t) =
26+
let run_in_thread tool args pipe_writer replace_fds =
3127
Thread.create
3228
(fun () ->
3329
Xapi_stdext_pervasives.Pervasiveext.finally
3430
(fun () ->
35-
Vhd_qcow_parsing.run_tool qcow_tool progress_cb args
36-
~output_fd:pipe_writer
31+
Vhd_qcow_parsing.run_tool tool progress_cb args
32+
~output_fd:pipe_writer ~replace_fds
3733
)
3834
(fun () -> Unix.close pipe_writer)
3935
)
4036
()
4137
in
42-
pipe_reader
38+
39+
let map_pipe_reader, map_pipe_writer = Unix.pipe ~cloexec:true () in
40+
let (_ : Thread.t) =
41+
run_in_thread !Xapi_globs.qemu_img
42+
["map"; qcow_path; "--output=json"]
43+
map_pipe_writer []
44+
in
45+
46+
let info_pipe_reader, info_pipe_writer = Unix.pipe ~cloexec:true () in
47+
let (_ : Thread.t) =
48+
run_in_thread !Xapi_globs.qemu_img
49+
["info"; qcow_path; "--output=json"]
50+
info_pipe_writer []
51+
in
52+
53+
(map_pipe_reader, info_pipe_reader)
4354

4455
let parse_header qcow_path =
45-
let pipe_reader = read_header qcow_path in
46-
Vhd_qcow_parsing.parse_header pipe_reader
56+
let pipe, _ = read_header qcow_path in
57+
Vhd_qcow_parsing.parse_header pipe
58+
59+
let parse_header_interval qcow_path =
60+
let pipes = read_header qcow_path in
61+
Vhd_qcow_parsing.parse_header_qemu_img pipes
4762

4863
let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr)
4964
(path : string) (_size : Int64.t) =
@@ -54,7 +69,12 @@ let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr)
5469

5570
(* If VDI is backed by QCOW, parse the header to determine nonzero clusters
5671
to avoid reading all of the raw disk *)
57-
let input_fd = Result.map read_header qcow_path |> Result.to_option in
72+
let input_fds = Result.map read_header qcow_path |> Result.to_option in
73+
74+
(* TODO: If VHD headers are to be consulted as well, qcow2-to-stdout
75+
needs to properly account for cluster_bits. Currently QCOW2 export
76+
from VHD-backed VDIs will just revert to raw, without any
77+
allocation accounting. *)
5878

5979
(* Parse the header of the VDI we are diffing against as well *)
6080
let relative_to_qcow_path =
@@ -64,28 +84,60 @@ let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr)
6484
| None ->
6585
None
6686
in
67-
let diff_fd = Option.map read_header relative_to_qcow_path in
87+
let diff_fds = Option.map read_header relative_to_qcow_path in
88+
89+
let map_fd_string = Uuidx.(to_string (make ())) in
90+
let info_fd_string = Uuidx.(to_string (make ())) in
91+
let diff_map_fd_string = Uuidx.(to_string (make ())) in
92+
let diff_info_fd_string = Uuidx.(to_string (make ())) in
6893

69-
let unique_string = Uuidx.(to_string (make ())) in
7094
let args =
7195
[path]
7296
@ (match relative_to with None -> [] | Some vdi -> ["--diff"; vdi])
7397
@ ( match relative_to_qcow_path with
7498
| None ->
7599
[]
76100
| Some _ ->
77-
["--json-header-diff"; unique_string]
101+
[
102+
"--json-header-diff-map"
103+
; diff_map_fd_string
104+
; "--json-header-diff-info"
105+
; diff_info_fd_string
106+
]
78107
)
79-
@ match qcow_path with Error _ -> [] | Ok _ -> ["--json-header"]
108+
@
109+
match qcow_path with
110+
| Error _ ->
111+
[]
112+
| Ok _ ->
113+
[
114+
"--json-header-map"
115+
; map_fd_string
116+
; "--json-header-info"
117+
; info_fd_string
118+
]
80119
in
81120
let qcow_tool = !Xapi_globs.qcow_to_stdout in
82-
let replace_fds = Option.map (fun fd -> [(unique_string, fd)]) diff_fd in
121+
let replace_fds =
122+
Option.map
123+
(fun (map_fd, info_fd) ->
124+
let rfds = [(map_fd_string, map_fd); (info_fd_string, info_fd)] in
125+
match diff_fds with
126+
| Some (diff_map_fd, diff_info_fd) ->
127+
(diff_map_fd_string, diff_map_fd)
128+
:: (diff_info_fd_string, diff_info_fd)
129+
:: rfds
130+
| None ->
131+
rfds
132+
)
133+
input_fds
134+
in
83135
Xapi_stdext_pervasives.Pervasiveext.finally
84136
(fun () ->
85-
Vhd_qcow_parsing.run_tool qcow_tool progress_cb args ?input_fd
86-
~output_fd:unix_fd ?replace_fds
137+
Vhd_qcow_parsing.run_tool qcow_tool progress_cb args ~output_fd:unix_fd
138+
?replace_fds
87139
)
88140
(fun () ->
89-
Option.iter Unix.close input_fd ;
90-
Option.iter Unix.close diff_fd
141+
Option.iter (fun (x, y) -> Unix.close x ; Unix.close y) input_fds ;
142+
Option.iter (fun (x, y) -> Unix.close x ; Unix.close y) diff_fds
91143
)

ocaml/xapi/qcow_tool_wrapper.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,5 @@ val send :
2525
-> unit
2626

2727
val parse_header : string -> int * int list
28+
29+
val parse_header_interval : string -> int * (int * int) list

0 commit comments

Comments
 (0)