diff --git a/dune-project b/dune-project index b317133e59a..9b158da5255 100644 --- a/dune-project +++ b/dune-project @@ -235,7 +235,6 @@ fmt logs mtime - xapi-backtrace (xapi-stdext-pervasives (= :version)))) (package @@ -322,7 +321,6 @@ uuidm uutf x509 - xapi-backtrace xapi-log xapi-types xapi-stdext-pervasives @@ -451,7 +449,6 @@ uutf uuidm x509 - xapi-backtrace (xapi-client (= :version)) (xapi-cli-protocol @@ -764,7 +761,6 @@ uri (uuid (= :version)) - xapi-backtrace (xapi-idl (= :version)) (xapi-log @@ -795,7 +791,6 @@ rpclib (uuid (= :version)) - xapi-backtrace (xapi-log (= :version)) (xapi-stdext-pervasives @@ -842,7 +837,7 @@ (>= 4.08)) logs (odoc :with-doc) - xapi-backtrace)) + )) (package (name xapi-stdext-std) @@ -902,7 +897,6 @@ (>= 0.21.2) :with-test)) (odoc :with-doc) - xapi-backtrace unix-errno (xapi-stdext-pervasives (= :version)) diff --git a/ocaml/database/dune b/ocaml/database/dune index 74b6c512d66..cb81fc54b2c 100644 --- a/ocaml/database/dune +++ b/ocaml/database/dune @@ -53,7 +53,7 @@ httpsvr unix uuid - xapi-backtrace + backtrace xapi-datamodel xapi-log (re_export xapi-schema) diff --git a/ocaml/forkexecd/lib/dune b/ocaml/forkexecd/lib/dune index 90db94c2a53..42cedda8282 100644 --- a/ocaml/forkexecd/lib/dune +++ b/ocaml/forkexecd/lib/dune @@ -12,7 +12,7 @@ rpclib.xml unix uuid - xapi-backtrace + backtrace xapi-log xapi-stdext-pervasives xapi-stdext-unix diff --git a/ocaml/forkexecd/lib/fe_stubs.c b/ocaml/forkexecd/lib/fe_stubs.c index 89e14101f13..9fcea2bef52 100644 --- a/ocaml/forkexecd/lib/fe_stubs.c +++ b/ocaml/forkexecd/lib/fe_stubs.c @@ -20,6 +20,8 @@ #include #include +#include +#include #include #include #include @@ -213,7 +215,7 @@ caml_safe_exec_with_helper(value args, value environment) } // potentially slow section, release Ocaml engine - caml_enter_blocking_section(); + caml_release_runtime_system(); safe_exec_result res; int err = safe_exec_with_helper(&res, c_args, c_envs); @@ -221,7 +223,7 @@ caml_safe_exec_with_helper(value args, value environment) free(c_envs); free(c_args); - caml_leave_blocking_section(); + caml_acquire_runtime_system(); // error, notify with an exception if (err != 0) @@ -395,7 +397,7 @@ caml_pidwaiter_waitpid(value timeout_value, value pid_value) double timeout = timeout_value == Val_none ? 0 : Double_val(Some_val(timeout_value)); pid_t pid = Int_val(pid_value); - caml_enter_blocking_section(); + caml_release_runtime_system(); bool timed_out = false; int err = 0; @@ -407,7 +409,7 @@ caml_pidwaiter_waitpid(value timeout_value, value pid_value) timed_out = true; } - caml_leave_blocking_section(); + caml_acquire_runtime_system(); if (err) unix_error(err, "waitpid", Nothing); diff --git a/ocaml/gencert/dune b/ocaml/gencert/dune index 19ef8f1b33a..470b16a30e8 100644 --- a/ocaml/gencert/dune +++ b/ocaml/gencert/dune @@ -18,7 +18,7 @@ rresult unix x509 - xapi-backtrace + backtrace xapi-consts xapi-log xapi-inventory diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 427e18db7f0..076c0a591dd 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -234,7 +234,7 @@ let prototyped_of_message = function | "PCI", "disable_dom0_access" -> Some "24.14.0" | "message", "destroy_all" -> - Some "26.5.0-next" + Some "26.6.0" | "message", "destroy_many" -> Some "22.19.0" | "VTPM", "set_contents" -> diff --git a/ocaml/idl/dune b/ocaml/idl/dune index ac591ae1e0f..57958ea6f51 100644 --- a/ocaml/idl/dune +++ b/ocaml/idl/dune @@ -13,7 +13,7 @@ sexplib0 sexpr threads - xapi-backtrace + backtrace xapi-consts xapi-schema clock diff --git a/ocaml/idl/ocaml_backend/dune b/ocaml/idl/ocaml_backend/dune index 6304d72729c..f3273fe5c6d 100644 --- a/ocaml/idl/ocaml_backend/dune +++ b/ocaml/idl/ocaml_backend/dune @@ -4,6 +4,8 @@ (libraries astring cmdliner + fmt + ptime.clock uuidm xapi-consts xapi-datamodel diff --git a/ocaml/idl/ocaml_backend/gen_client.ml b/ocaml/idl/ocaml_backend/gen_client.ml index 9ed3a703659..cb61df5795c 100644 --- a/ocaml/idl/ocaml_backend/gen_client.ml +++ b/ocaml/idl/ocaml_backend/gen_client.ml @@ -319,6 +319,12 @@ let gen_module api : O.Module.t = ; " | Rpc.Enum ((Rpc.String code) :: args) -> return (server_failure \ code (List.map Rpc.string_of_rpc args))" ; " | rpc -> failwith (\"Client.rpc: \" ^ Rpc.to_string rpc)" + ; "type client = {rpc: Rpc.call -> Rpc.response; session_id: ref_session}" + ; "type 'a api = rpc:(Rpc.call -> Rpc.response) -> session_id:ref_session \ + -> 'a" + ; "" + ; "let call {rpc; session_id} f = f ~rpc ~session_id" + ; "" ] in let postamble = diff --git a/ocaml/idl/ocaml_backend/gen_rbac.ml b/ocaml/idl/ocaml_backend/gen_rbac.ml index 276f285fb3e..5ce8c0abb90 100644 --- a/ocaml/idl/ocaml_backend/gen_rbac.ml +++ b/ocaml/idl/ocaml_backend/gen_rbac.ml @@ -34,10 +34,13 @@ let internal_role_local_root = "_local_root_" (* the output of this function is used as input by the automatic tests *) let writer_csv static_permissions_roles = - Printf.sprintf "%s,PERMISSION/ROLE,%s\n" - (let t = Debug.gettimestring () in - String.sub t 0 (String.length t - 1) - ) + let now = + let now = Ptime_clock.now () in + let str = Fmt.str "%a" Ptime.(pp_rfc3339 ~frac_s:3 ~tz_offset_s:0 ()) now in + (* remove separators between Year, Month, and Day; to keep old logging format *) + Astring.String.filter (function '-' -> false | _ -> true) str + in + Printf.sprintf "%s,PERMISSION/ROLE,%s\n" now (* role titles are ordered by roles in roles_all *) (List.fold_left (fun rr r -> rr ^ r ^ ",") "" Datamodel_roles.roles_all) ^ List.fold_left diff --git a/ocaml/libs/backtrace/.git-blame-ignore-revs b/ocaml/libs/backtrace/.git-blame-ignore-revs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/libs/backtrace/.gitignore b/ocaml/libs/backtrace/.gitignore new file mode 100644 index 00000000000..2aaa9aceff5 --- /dev/null +++ b/ocaml/libs/backtrace/.gitignore @@ -0,0 +1,3 @@ +_build +*.install +.merlin diff --git a/ocaml/libs/backtrace/CHANGES.md b/ocaml/libs/backtrace/CHANGES.md new file mode 100644 index 00000000000..9b42cf866f0 --- /dev/null +++ b/ocaml/libs/backtrace/CHANGES.md @@ -0,0 +1,26 @@ +## v0.8 (13-Mar-2026) +* Set a license +* Provide a new with_backtraces that prevents printing invalid traces +* Fix losing backtraces when reraising +* Add regression tests + +## v0.7 (18-Sep-2018) +* Remove dependency on full sexplib +* Simplify jbuild, quiet warnings, move to dune and update opam dependencies +* jbuild: remove ppx_deriving_rpc from libraries +* Move to dune and update opam dependencies + +## v0.6 (16-May-2018) +* Add support for ppx_sexp_conv >= v0.11.0 + +## v0.5 (04-Aug-2017) +* port to jbuilder + +## v0.3 (21-Aug-2015) +* correct ordering +* add rpc to opam +* add doc gen to _oasis + +## v0.2 (20-Nov-2014) +* store backtraces as lists of records rather than strings +* change the API for "importing" backtraces from other languages diff --git a/ocaml/libs/backtrace/LICENSE b/ocaml/libs/backtrace/LICENSE new file mode 100644 index 00000000000..1b1ce97cb5c --- /dev/null +++ b/ocaml/libs/backtrace/LICENSE @@ -0,0 +1,521 @@ +This repository is distributed under the terms of the GNU Lesser General +Public License version 2.1 (included below). + +As a special exception to the GNU Lesser General Public License, you +may link, statically or dynamically, a "work that uses the Library" +with a publicly distributed version of the Library to produce an +executable file containing portions of the Library, and distribute +that executable file under terms of your choice, without any of the +additional requirements listed in clause 6 of the GNU Lesser General +Public License. By "a publicly distributed version of the Library", +we mean either the unmodified Library as distributed, or a +modified version of the Library that is distributed under the +conditions defined in clause 3 of the GNU Library General Public +License. This exception does not however invalidate any other reasons +why the executable file might be covered by the GNU Lesser General +Public License. + +------------ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/ocaml/libs/backtrace/README.md b/ocaml/libs/backtrace/README.md new file mode 100644 index 00000000000..12bd81ad09a --- /dev/null +++ b/ocaml/libs/backtrace/README.md @@ -0,0 +1,7 @@ +backtrace +========= + +[![Build Status](https://travis-ci.org/xapi-project/backtrace.svg?branch=master)](https://travis-ci.org/xapi-project/backtrace) +[![API reference](https://img.shields.io/badge/docs-API_reference-blue.svg)](http://xapi-project.github.io/backtrace) + +Helper functions to preserve and transport exception backtraces diff --git a/ocaml/libs/backtrace/lib/backtrace.ml b/ocaml/libs/backtrace/lib/backtrace.ml new file mode 100644 index 00000000000..7f17bbbd4b8 --- /dev/null +++ b/ocaml/libs/backtrace/lib/backtrace.ml @@ -0,0 +1,295 @@ +(* + * Copyright (C) 2006-2014 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +open Sexplib0.Sexp_conv + +let my_name = ref (Filename.basename Sys.argv.(0)) + +let set_my_name x = my_name := x + +module Mutex = struct + include Mutex + + (** execute the function f with the mutex hold *) + let execute lock f = + Mutex.lock lock ; + let r = + begin try f () with exn -> Mutex.unlock lock ; raise exn + end + in + Mutex.unlock lock ; r +end + +type frame = {process: string; filename: string; line: int} [@@deriving sexp] + +type t = frame list [@@deriving sexp] + +let empty = [] + +let to_string_hum xs = + let xs' = List.length xs in + let results = Buffer.create 10 in + let rec loop first_line i = function + | [] -> + Buffer.contents results + | x :: xs -> + Buffer.add_string results + (Printf.sprintf "%d/%d %s %s file %s, line %d" i xs' x.process + ( if first_line then + "Raised at" + else + "Called from" + ) + x.filename x.line + ) ; + Buffer.add_string results "\n" ; + loop false (i + 1) xs + in + match xs with + | [] -> + Printf.sprintf "%s: Thread %d has no backtrace table" !my_name + Thread.(id (self ())) + | _ -> + loop true 1 xs + +type table = { + backtraces: t array + ; exn_to_backtrace: exn Weak.t + ; mutable producer: int (* free running counter *) + ; m: Mutex.t +} + +(* Increasing this makes 'find_all' slower and increases the amount of + memory needed. Since we have a table per thread a small number should + be enough. *) +let max_backtraces = 100 + +let frame_of_string process x = + try + begin match String.split_on_char '"' x with + | [_; filename; rest] -> begin + match String.split_on_char ',' rest with + | [_; line_n; _] -> begin + match String.split_on_char ' ' line_n with + | _ :: _ :: n :: _ -> + {process; filename; line= int_of_string n} + | _ -> + failwith (Printf.sprintf "Failed to parse line: [%s]" line_n) + end + | _ -> + failwith (Printf.sprintf "Failed to parse fragment: [%s]" filename) + end + | _ -> + failwith (Printf.sprintf "Failed to parse fragment: [%s]" x) + end + with e -> {process; filename= "(" ^ Printexc.to_string e ^ ")"; line= 0} + +let get_backtrace_401 () = + Printexc.get_backtrace () + |> String.split_on_char '\n' + |> List.filter (fun x -> x <> "") + |> List.map (frame_of_string !my_name) + +let make () = + let backtraces = Array.make max_backtraces [] in + let exn_to_backtrace = Weak.create max_backtraces in + let producer = 0 in + (* free running *) + let m = Mutex.create () in + {backtraces; exn_to_backtrace; producer; m} + +let add t exn bt = + Mutex.execute t.m (fun () -> + let slot = t.producer mod max_backtraces in + t.producer <- t.producer + 1 ; + Weak.set t.exn_to_backtrace slot (Some exn) ; + t.backtraces.(slot) <- bt + ) + +let is_important t exn = + let bt = get_backtrace_401 () in + (* Deliberately clear the backtrace buffer *) + (try raise Not_found with Not_found -> ()) ; + add t exn bt + +(* fold over the slots matching exn *) +let fold t exn f initial = + let rec loop acc from = + if from < 0 || t.producer - from > max_backtraces then + acc + else + let slot = from mod max_backtraces in + match Weak.get t.exn_to_backtrace slot with + | Some exn' when exn' == exn -> + loop (f acc slot) (from - 1) + | _ -> + loop acc (from - 1) + in + loop initial (t.producer - 1) + +let remove_dups xs = + List.fold_left + (fun (last, acc) item -> + ( item + , if last = item then + acc + else + item :: acc + ) + ) + ([], []) (List.rev xs) + |> snd + +(* + |> List.rev +*) +let get t exn = + fold t exn (fun acc slot -> t.backtraces.(slot) :: acc) [] + |> remove_dups + |> List.concat + +let remove t exn = + fold t exn + (fun acc slot -> + let bt = t.backtraces.(slot) in + Weak.set t.exn_to_backtrace slot None ; + t.backtraces.(slot) <- [] ; + bt :: acc + ) + [] + |> remove_dups + |> List.concat + +module IntMap = Map.Make (Int) + +module ThreadLocalTable = struct + (* The map values behave like stacks here, with shadowing as in Hashtbl. + A Hashtbl is not used here, in order to avoid taking the lock in `find`. *) + type 'a t = {mutable tbl: 'a list IntMap.t; m: Mutex.t} + + let make () = + let tbl = IntMap.empty in + let m = Mutex.create () in + {tbl; m} + + let add t v = + let id = Thread.(id (self ())) in + Mutex.execute t.m (fun () -> + t.tbl <- + IntMap.update id + (function Some v' -> Some (v :: v') | None -> Some [v]) + t.tbl + ) + + let remove t = + let id = Thread.(id (self ())) in + Mutex.execute t.m (fun () -> + t.tbl <- + IntMap.update id + (function + | Some [_] -> + None + | Some (_hd :: tl) -> + Some tl + | Some [] | None -> + None + ) + t.tbl + ) + + let find t = + let id = Thread.(id (self ())) in + IntMap.find_opt id t.tbl + |> Option.fold ~none:None ~some:(function v :: _ -> Some v | [] -> None) +end + +let per_thread_backtraces = ThreadLocalTable.make () + +let ( let@ ) f x = f x + +let try_result f = try Ok (f ()) with exn -> Error exn + +let with_backtraces_common f with_table = + let tbl = + let tbl = + match ThreadLocalTable.find per_thread_backtraces with + | Some tbl -> + tbl + | None -> + make () + in + (* If we nest these functions we add multiple bindings + to the same mutable table which is ok *) + ThreadLocalTable.add per_thread_backtraces tbl ; + tbl + in + let finally () = ThreadLocalTable.remove per_thread_backtraces in + Fun.protect ~finally (fun () -> with_table tbl (try_result f)) + +module V1 = struct + let with_backtraces f = + let with_table tbl = function + | Ok ok -> + `Ok ok + | Error e -> + `Error (e, get tbl e) + in + with_backtraces_common f with_table +end + +module V2 = struct + let with_backtraces ~finally f = + let with_table tbl result = + result |> Result.map_error (function e -> (e, get tbl e)) |> finally + in + with_backtraces_common f with_table +end + +let with_backtraces = V1.with_backtraces + +let is_important exn = + ThreadLocalTable.find per_thread_backtraces + |> Option.iter (fun tbl -> is_important tbl exn) + +let add exn bt = + ThreadLocalTable.find per_thread_backtraces + |> Option.iter (fun tbl -> add tbl exn bt) + +let remove exn = + ThreadLocalTable.find per_thread_backtraces + |> Option.fold ~some:(fun tbl -> remove tbl exn) ~none:empty + +let get exn = + ThreadLocalTable.find per_thread_backtraces + |> Option.fold ~some:(fun tbl -> get tbl exn) ~none:empty + +let reraise old newexn = + is_important old ; + add newexn (remove old) ; + raise newexn + +module Interop = struct + (* This matches xapi.py:exception *) + type error = { + error: string + ; (* Python json.dumps and rpclib are not very friendly *) + files: string list + ; lines: int list + } + [@@deriving rpc] + + let of_json source_name txt = + txt |> Jsonrpc.of_string |> error_of_rpc |> fun e -> + List.combine e.files e.lines + |> List.map (fun (filename, line) -> {process= source_name; filename; line}) +end diff --git a/ocaml/libs/backtrace/lib/backtrace.mli b/ocaml/libs/backtrace/lib/backtrace.mli new file mode 100644 index 00000000000..73d7f2bbf6e --- /dev/null +++ b/ocaml/libs/backtrace/lib/backtrace.mli @@ -0,0 +1,91 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** A backtrace from a particular thread. *) +type t [@@deriving sexp] + +val empty : t +(** An empty backtrace *) + +val to_string_hum : t -> string +(** Produce a human-readable printable/loggable version of the + backtrace. *) + +(** {2 Handling exceptions without losing backtraces} + Whenever a function raises an exception, the backtrace buffer is + emptied. Therefore when we are handling an exception, we must + stash away a copy of the backtrace buffer if there is any risk + of us raising another (or even the same) exception) *) + +module V1 : sig + val with_backtraces : (unit -> 'a) -> [`Ok of 'a | `Error of exn * t] + [@@deprecated "V2.with_backtraces"] end + +module V2 : sig + val with_backtraces : + finally:(('a, exn * t) result -> 'a) -> (unit -> 'a) -> 'a + (** [with_backtraces thread finally] Allows backtraces to be recorded within + [thread]. [finally] is executed whenever [thread] finishes, this allows + users to use the stacktrace before it's dropped from the cache, for + example, to log it. *) +end + +val with_backtraces : (unit -> 'a) -> [`Ok of 'a | `Error of exn * t] +(** Allow backtraces to be recorded for this thread. All new threads + must be wrapped in this for the backtrace tracking to work. + It is acceptable to nest these wrappers; it will not affect the + backtrace recording behaviour. Please change to [V2.with_backtraces] *) + +val is_important : exn -> unit +(** Declare that the backtrace is important for debugging and should be + permanently associated with the exception. Call this function in + an exception handler where you might need to re-raise the same + exception at the end after performing some cleanup, which could + clear the current backtrace buffer.*) + +val get : exn -> t +(** Get a copy of the backtrace associated with [exn] *) + +val add : exn -> t -> unit +(** Associate additional backtrace with an exception. This allows + you to combine a backtrace from another process with your current + backtrace. *) + +val reraise : exn -> exn -> 'a +(** [reraise old new] associates the backtrace of [old] with [new] + and throws [new]. Use this if you need to 'launder' an exception + e.g. you may want to catch Not_found and throw a more descriptive + exception instead without losing the backtrace. *) + +val remove : exn -> t +(** Get a backtrace associated with [exn] and remove it from the tables. + Use this when you want to print/log or otherwise record the final + backtrace. *) + +(** {2 Administrivia} *) + +val set_my_name : string -> unit +(** Every backtrace line will include a name for this process. By default it + will be the executable name, but it could also include the process ID + and host. *) + +(** {2 Interop with other languages} + This allows backtraces from other languages (e.g. python) to be converted + into OCaml-style backtraces. *) + +module Interop : sig + val of_json : string -> string -> t + (** [of_json source_name json]: unmarshals a json-format backtrace from + [source_name] *) +end diff --git a/ocaml/libs/backtrace/lib/dune b/ocaml/libs/backtrace/lib/dune new file mode 100644 index 00000000000..53c1b13ecda --- /dev/null +++ b/ocaml/libs/backtrace/lib/dune @@ -0,0 +1,7 @@ +(library + (name backtrace) + (package xapi-log) + (flags (:standard -w -39-32)) + (libraries + rpclib.core rpclib.json threads.posix) + (preprocess (pps ppx_deriving_rpc ppx_sexp_conv))) diff --git a/ocaml/libs/backtrace/test/dune b/ocaml/libs/backtrace/test/dune new file mode 100644 index 00000000000..085a5c65253 --- /dev/null +++ b/ocaml/libs/backtrace/test/dune @@ -0,0 +1,13 @@ +(library + (name backtrace_test_lib) + (modules log) + (libraries backtrace) + ) + +(executable + (name raiser) + (modules raiser) + (libraries backtrace_test_lib backtrace) + ) + +(cram (deps ./raiser.exe)) diff --git a/ocaml/libs/backtrace/test/log.ml b/ocaml/libs/backtrace/test/log.ml new file mode 100644 index 00000000000..c773272b6dd --- /dev/null +++ b/ocaml/libs/backtrace/test/log.ml @@ -0,0 +1,59 @@ +let ( let@ ) f x = f x + +let output_log s = Printf.printf "%s\n%!" s + +let log_backtrace_exn exn bt = + (* We already got the backtrace in the `bt` argument when called from + with_thread_associated. Log that, and remove `exn` from the backtraces + table. If with_backtraces was not nested then looking at `bt` is the only + way to get a proper backtrace, otherwise exiting from `with_backtraces` + would've removed the backtrace from the thread-local backtraces table, and + we'd always just log a message complaining about with_backtraces not being + called, which is not true because it was. + *) + let bt' = Backtrace.remove exn in + (* bt could be empty, but bt' would contain a non-empty warning, so compare 'bt' here *) + let bt = + if bt = Backtrace.empty then + bt' + else + bt + in + let all = String.split_on_char '\n' Backtrace.(to_string_hum bt) in + output_log (Printf.sprintf "Raised %s" (Printexc.to_string exn)) ; + List.iter output_log all + +let with_thread_associated_old desc f x = + let result = + let@ () = + begin[@alert "-deprecated"] + Backtrace.V1.with_backtraces + end + in + try f x with e -> Backtrace.is_important e ; raise e + in + match result with + | `Ok result -> + result + | `Error (exn, bt) -> + output_log + (Printf.sprintf "%s failed with exception %s" desc + (Printexc.to_string exn) + ) ; + log_backtrace_exn exn bt ; + raise exn + +let with_thread_associated desc f x = + let print_backtrace = function + | Ok result -> + result + | Error (exn, bt) -> + output_log + (Printf.sprintf "%s failed with exception %s" desc + (Printexc.to_string exn) + ) ; + log_backtrace_exn exn bt ; + raise exn + in + let@ () = Backtrace.V2.with_backtraces ~finally:print_backtrace in + try f x with e -> Backtrace.is_important e ; raise e diff --git a/ocaml/libs/backtrace/test/log.mli b/ocaml/libs/backtrace/test/log.mli new file mode 100644 index 00000000000..c089be8a2a5 --- /dev/null +++ b/ocaml/libs/backtrace/test/log.mli @@ -0,0 +1,4 @@ +val with_thread_associated : string -> ('a -> 'b) -> 'a -> 'b + +val with_thread_associated_old : string -> ('a -> 'b) -> 'a -> 'b +(** Uses V1.with_backtrace *) diff --git a/ocaml/libs/backtrace/test/raiser.ml b/ocaml/libs/backtrace/test/raiser.ml new file mode 100644 index 00000000000..2d5c954d5f2 --- /dev/null +++ b/ocaml/libs/backtrace/test/raiser.ml @@ -0,0 +1,51 @@ +let foo () : unit = raise (Failure "foo") + +let bar () = + try foo () with Failure _ as e -> Backtrace.reraise e (Failure "bar") + +let baz () = try foo () with exn -> raise exn + +let no_backtraces = ref false + +let test_no_backtraces () = + Printexc.record_backtrace false ; + try Backtrace_test_lib.Log.with_thread_associated "Backtrace lab" foo () + with _ -> () + +let reraise = ref false + +let test_reraise () = + Printexc.record_backtrace true ; + try Backtrace_test_lib.Log.with_thread_associated "Backtrace lab" bar () + with _ -> () + +let v1_with_backtrace = ref false + +let test_v1_with_backtrace () = + Printexc.record_backtrace false ; + try Backtrace_test_lib.Log.with_thread_associated_old "Backtrace lab" foo () + with _ -> () + +let raise_again = ref false + +let test_raise_again () = + Printexc.record_backtrace true ; + try Backtrace_test_lib.Log.with_thread_associated "Backtrace lab" baz () + with _ -> () + +let usage = Printf.sprintf "%s" Sys.argv.(0) + +let speclist = + [ + ("-no-backtraces", Arg.Set no_backtraces, "Test no-backtraces") + ; ("-reraise", Arg.Set reraise, "Test reraise") + ; ("-v1-with-backtrace", Arg.Set v1_with_backtrace, "Test v1-with-backtrace") + ; ("-raise-again", Arg.Set raise_again, "Test raise-again") + ] + +let () = + Arg.parse speclist (Fun.const ()) usage ; + if !no_backtraces then test_no_backtraces () ; + if !reraise then test_reraise () ; + if !v1_with_backtrace then test_v1_with_backtrace () ; + if !raise_again then test_raise_again () diff --git a/ocaml/libs/backtrace/test/raiser.mli b/ocaml/libs/backtrace/test/raiser.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/libs/backtrace/test/reraise.t b/ocaml/libs/backtrace/test/reraise.t new file mode 100644 index 00000000000..128ba914e7c --- /dev/null +++ b/ocaml/libs/backtrace/test/reraise.t @@ -0,0 +1,27 @@ + $ ./raiser.exe -no-backtraces + Backtrace lab failed with exception Failure("foo") + Raised Failure("foo") + raiser.exe: Thread 0 has no backtrace table + + $ ./raiser.exe -reraise + Backtrace lab failed with exception Failure("bar") + Raised Failure("bar") + 1/4 raiser.exe Raised at file ocaml/libs/backtrace/test/raiser.ml, line 1 + 2/4 raiser.exe Called from file ocaml/libs/backtrace/test/raiser.ml, line 4 + 3/4 raiser.exe Called from file ocaml/libs/backtrace/lib/backtrace.ml, line 279 + 4/4 raiser.exe Called from file ocaml/libs/backtrace/test/log.ml, line 59 + + + $ ./raiser.exe -v1-with-backtrace + Backtrace lab failed with exception Failure("foo") + Raised Failure("foo") + raiser.exe: Thread 0 has no backtrace table + + $ ./raiser.exe -raise-again + Backtrace lab failed with exception Failure("foo") + Raised Failure("foo") + 1/4 raiser.exe Raised at file ocaml/libs/backtrace/test/raiser.ml, line 1 + 2/4 raiser.exe Called from file ocaml/libs/backtrace/test/raiser.ml, line 6 + 3/4 raiser.exe Called from file ocaml/libs/backtrace/test/raiser.ml, line 6 + 4/4 raiser.exe Called from file ocaml/libs/backtrace/test/log.ml, line 59 + diff --git a/ocaml/libs/http-lib/dune b/ocaml/libs/http-lib/dune index 4e8d255b6bd..f05fdba3699 100644 --- a/ocaml/libs/http-lib/dune +++ b/ocaml/libs/http-lib/dune @@ -34,7 +34,7 @@ unix uuid uri - xapi-backtrace + backtrace xapi-consts.xapi_version xapi-idl.updates xapi-log @@ -60,7 +60,7 @@ tracing_propagator unix uri - xapi-backtrace + backtrace xapi-log xapi-stdext-pervasives xapi-stdext-threads @@ -84,7 +84,7 @@ stunnel threads.posix unix - xapi-backtrace + backtrace xapi-log xapi-stdext-pervasives xapi-stdext-unix)) diff --git a/ocaml/libs/log/debug.ml b/ocaml/libs/log/debug.ml index 17ea5fc66f9..a975dc6e71b 100644 --- a/ocaml/libs/log/debug.ml +++ b/ocaml/libs/log/debug.ml @@ -74,19 +74,22 @@ let tasks : task ThreadLocalTable.t = ThreadLocalTable.make () let names : string ThreadLocalTable.t = ThreadLocalTable.make () let gettimestring () = - let time = Unix.gettimeofday () in - let tm = Unix.gmtime time in - let msec = time -. floor time in - Printf.sprintf "%d%.2d%.2dT%.2d:%.2d:%.2d.%.3dZ|" (1900 + tm.Unix.tm_year) - (tm.Unix.tm_mon + 1) tm.Unix.tm_mday tm.Unix.tm_hour tm.Unix.tm_min - tm.Unix.tm_sec - (int_of_float (1000.0 *. msec)) + let now = Ptime_clock.now () in + Fmt.str "%a|" Ptime.(pp_rfc3339 ~frac_s:3 ~tz_offset_s:0 ()) now (** [escape str] efficiently escapes non-printable characters and in addition the backslash character. The function is efficient in the sense that it will allocate a new string only when necessary *) let escape = Astring.String.Ascii.escape +let remote_context = Ambient_context_thread_local.Thread_local.create () + +let set_remote_context = function + | None -> + Ambient_context_thread_local.Thread_local.remove remote_context + | Some context -> + Ambient_context_thread_local.Thread_local.set remote_context context + let format include_time brand priority message = let id = get_thread_id () in let task, name = @@ -102,13 +105,17 @@ let format include_time brand priority message = | Some {desc; client= Some client} -> (desc, Printf.sprintf "%s->%s" client name) in - Printf.sprintf "[%s%5s||%d %s|%s|%s] %s" + let remote_context = + Ambient_context_thread_local.Thread_local.get remote_context + |> Option.value ~default:"" + in + Printf.sprintf "[%s%5s|%s|%d %s|%s|%s] %s" ( if include_time then gettimestring () else "" ) - priority id name task brand message + priority remote_context id name task brand message let print_debug = ref false @@ -140,6 +147,10 @@ let facility = ref Syslog.Daemon let set_facility f = facility := f +let set_backtrace_name this_host_name = + let name = Printf.sprintf "%s @ %s" Sys.argv.(0) this_host_name in + Backtrace.set_my_name name + let get_facility () = !facility let output_log brand level priority s = @@ -216,20 +227,14 @@ let init_logs () = calling [output_log] too often. *) Logs.set_level (Some Logs.Warning) -let rec split_c c str = - try - let i = String.index str c in - String.sub str 0 i - :: split_c c (String.sub str (i + 1) (String.length str - i - 1)) - with Not_found -> [str] - let log_backtrace_exn ?(level = Syslog.Err) ?(msg = "error") exn bt = - (* We already got the backtrace in the `bt` argument when called from with_thread_associated. - Log that, and remove `exn` from the backtraces table. - If with_backtraces was not nested then looking at `bt` is the only way to get - a proper backtrace, otherwise exiting from `with_backtraces` would've removed the backtrace - from the thread-local backtraces table, and we'd always just log a message complaining about - with_backtraces not being called, which is not true because it was. + (* We already got the backtrace in the `bt` argument when called from + with_thread_associated. Log that, and remove `exn` from the backtraces + table. If with_backtraces was not nested then looking at `bt` is the only + way to get a proper backtrace, otherwise exiting from `with_backtraces` + would've removed the backtrace from the thread-local backtraces table, and + we'd always just log a message complaining about with_backtraces not being + called, which is not true because it was. *) let bt' = Backtrace.remove exn in (* bt could be empty, but bt' would contain a non-empty warning, so compare 'bt' here *) @@ -239,7 +244,7 @@ let log_backtrace_exn ?(level = Syslog.Err) ?(msg = "error") exn bt = else bt in - let all = split_c '\n' Backtrace.(to_string_hum bt) in + let all = String.split_on_char '\n' Backtrace.(to_string_hum bt) in (* Write to the log line at a time *) output_log "backtrace" level msg (Printf.sprintf "Raised %s" (Printexc.to_string exn)) ; diff --git a/ocaml/libs/log/debug.mli b/ocaml/libs/log/debug.mli index cc63ed9a7cb..0c5178b293c 100644 --- a/ocaml/libs/log/debug.mli +++ b/ocaml/libs/log/debug.mli @@ -29,14 +29,17 @@ val with_thread_associated : val with_thread_named : string -> ('a -> 'b) -> 'a -> 'b (** Do an action with a name associated with the current thread *) -module type BRAND = sig val name : string end +val set_remote_context : string option -> unit +(** [set_remote_context context] sets the remote context, will be logged as the 2nd field *) -val gettimestring : unit -> string -(** The current time of day in a format suitable for logging *) +module type BRAND = sig val name : string end val set_facility : Syslog.facility -> unit (** Set the syslog facility that will be used by this program. *) +val set_backtrace_name : string -> unit +(** Set the hostname for backtraces *) + val disable : ?level:Syslog.level -> string -> unit (** [disable brand] Suppress all log output from the given [brand]. Specifying a [level] disables * only this log level, otherwise all levels for the given diff --git a/ocaml/libs/log/dune b/ocaml/libs/log/dune index 8b4c2a1d512..bff990fbf23 100644 --- a/ocaml/libs/log/dune +++ b/ocaml/libs/log/dune @@ -5,12 +5,15 @@ (language c) (names syslog_stubs)) (libraries + ambient-context.thread_local astring fmt mtime logs + ptime + ptime.clock threads.posix - xapi-backtrace + backtrace unix ) (wrapped false) diff --git a/ocaml/libs/log/test/dune b/ocaml/libs/log/test/dune index 75fbbad7557..0da565b451b 100644 --- a/ocaml/libs/log/test/dune +++ b/ocaml/libs/log/test/dune @@ -1,6 +1,6 @@ (executable (name log_test) - (libraries log threads.posix xapi-backtrace)) + (libraries log threads.posix backtrace)) (cram (package xapi-log) diff --git a/ocaml/libs/open-uri/dune b/ocaml/libs/open-uri/dune index 4bf141d3a5b..867595ebff7 100644 --- a/ocaml/libs/open-uri/dune +++ b/ocaml/libs/open-uri/dune @@ -7,7 +7,7 @@ stunnel unix uri - xapi-backtrace + backtrace xapi-consts xapi-log xapi-stdext-pervasives diff --git a/ocaml/libs/resources/dune b/ocaml/libs/resources/dune index 358d7c799c5..6ac8c3940bb 100644 --- a/ocaml/libs/resources/dune +++ b/ocaml/libs/resources/dune @@ -3,7 +3,7 @@ (public_name safe-resources) (libraries logs - xapi-backtrace + backtrace fmt threads.posix unix diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index d759388fd81..a634ec23486 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -336,6 +336,9 @@ module Span = struct let get_trace_context t = t.context |> SpanContext.context_of_span_context + let[@inline always] set_trace_context trace_context = + trace_context |> TraceContext.traceparent_of |> Debug.set_remote_context + let start ?(attributes = Attributes.empty) ?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () = let trace_id, extra_context, depth = @@ -348,6 +351,7 @@ module Span = struct , TraceContext.baggage_depth_of span_parent.context.trace_context + 1 ) in + set_trace_context extra_context ; let span_id = Span_id.make () in let extra_context_with_depth = TraceContext.( @@ -405,7 +409,15 @@ module Span = struct let get_attributes span = Attributes.fold (fun k v tags -> (k, v) :: tags) span.attributes [] + let[@inline always] traceparent_of_parent parent = + parent |> get_context |> SpanContext.to_traceparent + let finish ?(attributes = Attributes.empty) ~span () = + (* Unfold the stack: set parent's traceparent if any. + If at top level then remove the trace context. + This ensures we don't have a stale trace context set. + *) + span.parent |> Option.map traceparent_of_parent |> Debug.set_remote_context ; let attributes = Attributes.union (fun _k a _b -> Some a) attributes span.attributes in diff --git a/ocaml/libs/vhd/vhd_format_lwt/odirect_stubs.c b/ocaml/libs/vhd/vhd_format_lwt/odirect_stubs.c index 7bc2044d3b4..4ef5359aaa3 100644 --- a/ocaml/libs/vhd/vhd_format_lwt/odirect_stubs.c +++ b/ocaml/libs/vhd/vhd_format_lwt/odirect_stubs.c @@ -36,7 +36,7 @@ CAMLprim value stub_openfile_direct(value filename, value rw, value perm){ const char *filename_c = strdup(String_val(filename)); - caml_release_runtime_system(); + int perm_c = Int_val(perm); int flags = 0; #if defined(O_DIRECT) flags |= O_DIRECT; @@ -46,7 +46,8 @@ CAMLprim value stub_openfile_direct(value filename, value rw, value perm){ } else { flags |= O_RDONLY; } - fd = open(filename_c, flags, Int_val(perm)); + caml_release_runtime_system(); + fd = open(filename_c, flags, perm_c); caml_acquire_runtime_system(); free((void*)filename_c); diff --git a/ocaml/libs/xapi-stdext/CHANGES.md b/ocaml/libs/xapi-stdext/CHANGES.md index 0973572d6da..63055a20e5b 100644 --- a/ocaml/libs/xapi-stdext/CHANGES.md +++ b/ocaml/libs/xapi-stdext/CHANGES.md @@ -132,7 +132,7 @@ * Namespace everything under Stdext. This is a backwards incompatible change. ## 0.13.0 (20-Nov-2014): -* Depend on Backtrace from xapi-backtrace +* Depend on Backtrace from backtrace * Add an opam file ## 0.12.0 (26-Sep-2014): diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/dune index 2a12545a2b9..12faadbcc7b 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/dune @@ -3,5 +3,5 @@ (public_name xapi-stdext-pervasives) (libraries logs - xapi-backtrace) + backtrace) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml index 8264c944b3d..30a47cc42a2 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml @@ -23,7 +23,7 @@ let finally fct clean_f = let result = try fct () with exn -> - Backtrace.is_important exn ; + let bt = Printexc.get_raw_backtrace () in ( try (* We catch and log exceptions raised by clean_f to avoid shadowing the original exception raised by fct *) @@ -36,7 +36,7 @@ let finally fct clean_f = (Printexc.to_string cleanup_exn) ) ) ; - raise exn + Printexc.raise_with_backtrace exn bt in clean_f () ; result diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.ml index 59bfc0240f8..10cfb258816 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.ml @@ -86,6 +86,20 @@ module List = struct in loop [] l + let try_map_collect f l = + let rec loop acc = function + | [] -> + Ok (List.rev acc) + | x :: xs -> ( + match f x with + | Ok r -> + loop (r :: acc) xs + | Error e -> + Error (List.rev acc, e) + ) + in + loop [] l + let take n list = let rec loop i acc = function | x :: xs when i < n -> diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.mli index b58b8d84016..c3ff7436d18 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-std/listext.mli @@ -40,9 +40,15 @@ module List : sig (** [last l] returns the last element of [l] or None if [l] is empty *) val try_map : ('a -> ('b, 'c) result) -> 'a list -> ('b list, 'c) result - (** [try_map f l] applies [f] to all elements of [l] in turn. Returns the - first [Error] result encountered or, if no errors were produced, returns - all the [Ok] results. *) + (** [try_map f l] applies [f] to elements of [l] in turn. Returns the first + [Error] result encountered or, if no errors were produced, returns all + the [Ok] results. *) + + val try_map_collect : + ('a -> ('b, 'c) result) -> 'a list -> ('b list, 'b list * 'c) result + (** [try_map_collect f l] applies [f] to elements of [l] in turn. Returns all + the [Ok] results, and the first [Error] result encountered, if it is + encountered. *) val rev_mapi : (int -> 'a -> 'b) -> 'a list -> 'b list (** [rev_map f l] gives the same result as {!Stdlib.List.rev}[ (] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/blkgetsize_stubs.c b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/blkgetsize_stubs.c index 0324f3dfb3f..b17732692cd 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/blkgetsize_stubs.c +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/blkgetsize_stubs.c @@ -23,13 +23,6 @@ #include #include -#include -#include -#include -#include -#include -#include - #include "blkgetsize.h" #ifdef __linux__ #include diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/dune index e73e4d47fa3..9116f7955ac 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/dune @@ -6,7 +6,7 @@ integers polly unix - xapi-backtrace + backtrace threads.posix unix-errno unix-errno.unix diff --git a/ocaml/libs/xenctrl-ext/xenctrlext_stubs.c b/ocaml/libs/xenctrl-ext/xenctrlext_stubs.c index 2577239b561..2467aa17bb9 100644 --- a/ocaml/libs/xenctrl-ext/xenctrlext_stubs.c +++ b/ocaml/libs/xenctrl-ext/xenctrlext_stubs.c @@ -35,9 +35,9 @@ static inline xc_interface *xch_of_val(value v) { - xc_interface *xch = *(xc_interface **)Data_custom_val(v); + xc_interface *xch = *(xc_interface **) Data_custom_val(v); - return xch; + return xch; } /* From xenctrl_stubs */ @@ -54,27 +54,28 @@ static inline xc_interface *xch_of_val(value v) static void stub_xenctrlext_finalize(value v) { - xc_interface_close(xch_of_val(v)); + xc_interface_close(xch_of_val(v)); } static struct custom_operations xenctrlext_ops = { - .identifier = "xapi-project.xenctrlext", - .finalize = stub_xenctrlext_finalize, - .compare = custom_compare_default, /* Can't compare */ - .hash = custom_hash_default, /* Can't hash */ - .serialize = custom_serialize_default, /* Can't serialize */ - .deserialize = custom_deserialize_default, /* Can't deserialize */ - .compare_ext = custom_compare_ext_default, /* Can't compare */ + .identifier = "xapi-project.xenctrlext", + .finalize = stub_xenctrlext_finalize, + .compare = custom_compare_default, /* Can't compare */ + .hash = custom_hash_default, /* Can't hash */ + .serialize = custom_serialize_default, /* Can't serialize */ + .deserialize = custom_deserialize_default, /* Can't deserialize */ + .compare_ext = custom_compare_ext_default, /* Can't compare */ }; static void raise_unix_errno_msg(int err_code, const char *err_msg) { - CAMLparam0(); - value args[] = { unix_error_of_code(err_code), caml_copy_string(err_msg) }; + CAMLparam0(); + value args[] = + { unix_error_of_code(err_code), caml_copy_string(err_msg) }; - caml_raise_with_args(*caml_named_value("Xenctrlext.Unix_error"), - sizeof(args)/sizeof(args[0]), args); - CAMLnoreturn; + caml_raise_with_args(*caml_named_value("Xenctrlext.Unix_error"), + sizeof(args) / sizeof(args[0]), args); + CAMLnoreturn; } static void failwith_xc(xc_interface *xch) @@ -82,178 +83,210 @@ static void failwith_xc(xc_interface *xch) static char error_str[XC_MAX_ERROR_MSG_LEN + 6]; int real_errno = errno; if (xch) { - snprintf(error_str, sizeof(error_str), "%d: %s", errno, strerror(errno)); + snprintf(error_str, sizeof(error_str), "%d: %s", errno, + strerror(errno)); } else { - snprintf(error_str, sizeof(error_str), "Unable to open XC interface"); + snprintf(error_str, sizeof(error_str), + "Unable to open XC interface"); } raise_unix_errno_msg(real_errno, error_str); } CAMLprim value stub_xenctrlext_interface_open(value unused) { - CAMLparam1(unused); - CAMLlocal1(result); - xc_interface *xch; + CAMLparam1(unused); + CAMLlocal1(result); + xc_interface *xch; - caml_release_runtime_system(); - xch = xc_interface_open(NULL, NULL, 0); - caml_acquire_runtime_system(); + caml_release_runtime_system(); + xch = xc_interface_open(NULL, NULL, 0); + caml_acquire_runtime_system(); - if ( !xch ) - failwith_xc(xch); + if (!xch) + failwith_xc(xch); - result = caml_alloc_custom(&xenctrlext_ops, sizeof(xch), 0, 1); - *(xc_interface **)Data_custom_val(result) = xch; + result = caml_alloc_custom(&xenctrlext_ops, sizeof(xch), 0, 1); + *(xc_interface **) Data_custom_val(result) = xch; - CAMLreturn(result); + CAMLreturn(result); } -CAMLprim value stub_xenctrlext_get_runstate_info(value xch_val, value domid) +CAMLprim value stub_xenctrlext_get_runstate_info(value xch_val, + value domid) { - CAMLparam2(xch_val, domid); + CAMLparam2(xch_val, domid); #if defined(XENCTRL_HAS_GET_RUNSTATE_INFO) - CAMLlocal1(result); - xc_runstate_info_t info; - int retval; - xc_interface *xch = xch_of_val(xch_val); - - retval = xc_get_runstate_info(xch, Int_val(domid), &info); - if (retval < 0) - failwith_xc(xch); - - /* Store - 0 : state (int32) - 1 : missed_changes (int32) - 2 : state_entry_time (int64) - 3-8 : times (int64s) - */ - result = caml_alloc_tuple(9); - Store_field(result, 0, caml_copy_int32(info.state)); - Store_field(result, 1, caml_copy_int32(info.missed_changes)); - Store_field(result, 2, caml_copy_int64(info.state_entry_time)); - Store_field(result, 3, caml_copy_int64(info.time[0])); - Store_field(result, 4, caml_copy_int64(info.time[1])); - Store_field(result, 5, caml_copy_int64(info.time[2])); - Store_field(result, 6, caml_copy_int64(info.time[3])); - Store_field(result, 7, caml_copy_int64(info.time[4])); - Store_field(result, 8, caml_copy_int64(info.time[5])); - - CAMLreturn(result); + CAMLlocal1(result); + xc_runstate_info_t info; + int retval; + xc_interface *xch = xch_of_val(xch_val); + int domain = Int_val(domid); + + caml_release_runtime_system(); + retval = xc_get_runstate_info(xch, domain, &info); + caml_acquire_runtime_system(); + if (retval < 0) + failwith_xc(xch); + + /* Store + 0 : state (int32) + 1 : missed_changes (int32) + 2 : state_entry_time (int64) + 3-8 : times (int64s) + */ + result = caml_alloc_tuple(9); + Store_field(result, 0, caml_copy_int32(info.state)); + Store_field(result, 1, caml_copy_int32(info.missed_changes)); + Store_field(result, 2, caml_copy_int64(info.state_entry_time)); + Store_field(result, 3, caml_copy_int64(info.time[0])); + Store_field(result, 4, caml_copy_int64(info.time[1])); + Store_field(result, 5, caml_copy_int64(info.time[2])); + Store_field(result, 6, caml_copy_int64(info.time[3])); + Store_field(result, 7, caml_copy_int64(info.time[4])); + Store_field(result, 8, caml_copy_int64(info.time[5])); + + CAMLreturn(result); #else - caml_failwith("XENCTRL_HAS_GET_RUNSTATE_INFO not defined"); + caml_failwith("XENCTRL_HAS_GET_RUNSTATE_INFO not defined"); #endif } -static int xcext_domain_send_s3resume(xc_interface *xch, unsigned int domid) +static int xcext_domain_send_s3resume(xc_interface *xch, + unsigned int domid) { - return xc_set_hvm_param(xch, domid, HVM_PARAM_ACPI_S_STATE, 0); + return xc_set_hvm_param(xch, domid, HVM_PARAM_ACPI_S_STATE, 0); } -static int xcext_domain_set_timer_mode(xc_interface *xch, unsigned int domid, int mode) +static int xcext_domain_set_timer_mode(xc_interface *xch, + unsigned int domid, int mode) { - return xc_set_hvm_param(xch, domid, + return xc_set_hvm_param(xch, domid, HVM_PARAM_TIMER_MODE, (unsigned long) mode); } -CAMLprim value stub_xenctrlext_domain_get_acpi_s_state(value xch_val, value domid) +CAMLprim value stub_xenctrlext_domain_get_acpi_s_state(value xch_val, + value domid) { - CAMLparam2(xch_val, domid); - unsigned long v; - int ret; - xc_interface* xch = xch_of_val(xch_val); + CAMLparam2(xch_val, domid); + unsigned long v; + int ret; + xc_interface *xch = xch_of_val(xch_val); + int domain = Int_val(domid); - ret = xc_get_hvm_param(xch, Int_val(domid), HVM_PARAM_ACPI_S_STATE, &v); - if (ret != 0) - failwith_xc(xch); + ret = xc_get_hvm_param(xch, domain, HVM_PARAM_ACPI_S_STATE, &v); + if (ret != 0) + failwith_xc(xch); - CAMLreturn(Val_int(v)); + CAMLreturn(Val_int(v)); } -CAMLprim value stub_xenctrlext_domain_send_s3resume(value xch_val, value domid) +CAMLprim value stub_xenctrlext_domain_send_s3resume(value xch_val, + value domid) { - CAMLparam2(xch_val, domid); - xc_interface *xch = xch_of_val(xch_val); + CAMLparam2(xch_val, domid); + xc_interface *xch = xch_of_val(xch_val); + int domain = Int_val(domid); - xcext_domain_send_s3resume(xch, Int_val(domid)); - CAMLreturn(Val_unit); + caml_release_runtime_system(); + xcext_domain_send_s3resume(xch, domain); + caml_acquire_runtime_system(); + CAMLreturn(Val_unit); } -CAMLprim value stub_xenctrlext_domain_set_timer_mode(value xch_val, value id, value mode) +CAMLprim value stub_xenctrlext_domain_set_timer_mode(value xch_val, + value id, value mode) { - CAMLparam3(xch_val, id, mode); - int ret; - xc_interface* xch = xch_of_val(xch_val); + CAMLparam3(xch_val, id, mode); + int ret; + xc_interface *xch = xch_of_val(xch_val); + int id_c = Int_val(id); + int mode_c = Int_val(mode); - ret = xcext_domain_set_timer_mode(xch, Int_val(id), Int_val(mode)); - if (ret < 0) - failwith_xc(xch); - CAMLreturn(Val_unit); + caml_release_runtime_system(); + ret = xcext_domain_set_timer_mode(xch, id_c, mode_c); + caml_acquire_runtime_system(); + + if (ret < 0) + failwith_xc(xch); + CAMLreturn(Val_unit); } CAMLprim value stub_xenctrlext_get_max_nr_cpus(value xch_val) { - CAMLparam1(xch_val); - xc_physinfo_t c_physinfo; + CAMLparam1(xch_val); + xc_physinfo_t c_physinfo; xc_interface *xch = xch_of_val(xch_val); - int r; + int r; - caml_release_runtime_system(); - r = xc_physinfo(xch, &c_physinfo); - caml_acquire_runtime_system(); + caml_release_runtime_system(); + r = xc_physinfo(xch, &c_physinfo); + caml_acquire_runtime_system(); - if (r) - failwith_xc(xch); + if (r) + failwith_xc(xch); - CAMLreturn(Val_int(c_physinfo.max_cpu_id + 1)); + CAMLreturn(Val_int(c_physinfo.max_cpu_id + 1)); } CAMLprim value stub_xenctrlext_domain_set_target(value xch_val, - value domid, - value target) + value domid, value target) { - CAMLparam3(xch_val, domid, target); - xc_interface* xch = xch_of_val(xch_val); + CAMLparam3(xch_val, domid, target); + xc_interface *xch = xch_of_val(xch_val); + int domain = Int_val(domid); + int target_c = Int_val(target); - int retval = xc_domain_set_target(xch, Int_val(domid), Int_val(target)); - if (retval) - failwith_xc(xch); - CAMLreturn(Val_unit); + caml_release_runtime_system(); + int retval = xc_domain_set_target(xch, domain, target_c); + caml_acquire_runtime_system(); + if (retval) + failwith_xc(xch); + CAMLreturn(Val_unit); } CAMLprim value stub_xenctrlext_physdev_map_pirq(value xch_val, - value domid, - value irq) + value domid, value irq) { CAMLparam3(xch_val, domid, irq); xc_interface *xch = xch_of_val(xch_val); int pirq = Int_val(irq); + int domain = Int_val(domid); caml_release_runtime_system(); - int retval = xc_physdev_map_pirq(xch, Int_val(domid), pirq, &pirq); + int retval = xc_physdev_map_pirq(xch, domain, pirq, &pirq); caml_acquire_runtime_system(); if (retval) failwith_xc(xch); CAMLreturn(Val_int(pirq)); -} /* ocaml here would be int -> int */ +} /* ocaml here would be int -> int */ CAMLprim value stub_xenctrlext_assign_device(value xch_val, value domid, - value machine_sbdf, value flag) + value machine_sbdf, + value flag) { CAMLparam4(xch_val, domid, machine_sbdf, flag); xc_interface *xch = xch_of_val(xch_val); + int domain = Int_val(domid); + int machine_sbdf_c = Int_val(machine_sbdf); + int flag_c = Int_val(flag); + caml_release_runtime_system(); - int retval = xc_assign_device(xch, Int_val(domid), Int_val(machine_sbdf), Int_val(flag)); + int retval = xc_assign_device(xch, domain, machine_sbdf_c, flag_c); caml_acquire_runtime_system(); if (retval) failwith_xc(xch); CAMLreturn(Val_unit); } -CAMLprim value stub_xenctrlext_deassign_device(value xch_val, value domid, value machine_sbdf) +CAMLprim value stub_xenctrlext_deassign_device(value xch_val, value domid, + value machine_sbdf) { CAMLparam3(xch_val, domid, machine_sbdf); xc_interface *xc = xch_of_val(xch_val); + int domain = Int_val(domid); + int machine_sbdf_c = Int_val(machine_sbdf); + caml_release_runtime_system(); - int retval = xc_deassign_device(xc, Int_val(domid), Int_val(machine_sbdf)); + int retval = xc_deassign_device(xc, domain, machine_sbdf_c); caml_acquire_runtime_system(); if (retval) failwith_xc(xc); @@ -266,27 +299,37 @@ CAMLprim value stub_xenctrlext_domid_quarantine(value unit) CAMLreturn(Val_int(DOMID_IO)); } -CAMLprim value stub_xenctrlext_domain_soft_reset(value xch_val, value domid) +CAMLprim value stub_xenctrlext_domain_soft_reset(value xch_val, + value domid) { CAMLparam2(xch_val, domid); xc_interface *xc = xch_of_val(xch_val); + int domain = Int_val(domid); caml_release_runtime_system(); - int retval = xc_domain_soft_reset(xc, Int_val(domid)); + int retval = xc_domain_soft_reset(xc, domain); caml_acquire_runtime_system(); if (retval) failwith_xc(xc); CAMLreturn(Val_unit); } -CAMLprim value stub_xenctrlext_domain_update_channels(value xch_val, value domid, - value store_port, value console_port) +CAMLprim value stub_xenctrlext_domain_update_channels(value xch_val, + value domid, + value store_port, + value console_port) { CAMLparam4(xch_val, domid, store_port, console_port); xc_interface *xc = xch_of_val(xch_val); + int domain = Int_val(domid); + int store_port_c = Int_val(store_port); + int console_port_c = Int_val(console_port); caml_release_runtime_system(); - int retval = xc_set_hvm_param(xc, Int_val(domid), HVM_PARAM_STORE_EVTCHN, Int_val(store_port)); + int retval = + xc_set_hvm_param(xc, domain, HVM_PARAM_STORE_EVTCHN, store_port_c); if (!retval) - retval = xc_set_hvm_param(xc, Int_val(domid), HVM_PARAM_CONSOLE_EVTCHN, Int_val(console_port)); + retval = + xc_set_hvm_param(xc, domain, HVM_PARAM_CONSOLE_EVTCHN, + console_port_c); caml_acquire_runtime_system(); if (retval) failwith_xc(xc); @@ -296,19 +339,19 @@ CAMLprim value stub_xenctrlext_domain_update_channels(value xch_val, value domid /* based on xenctrl_stubs.c */ static int get_cpumap_len(xc_interface *xch, value cpumap_val) { - int ml_len = Wosize_val(cpumap_val); - int xc_len = xc_get_max_cpus(xch); + int ml_len = Wosize_val(cpumap_val); + int xc_len = xc_get_max_cpus(xch); - return (ml_len < xc_len ? ml_len : xc_len); + return (ml_len < xc_len ? ml_len : xc_len); } static void populate_cpumap(xc_interface *xch, xc_cpumap_t cpumap, value cpumap_val) { - int i, len = get_cpumap_len(xch, cpumap_val); - for (i=0; i int64 array (p2) -> int64 array (new) */ CAMLprim value stub_xenctrlext_combine_cpu_featuresets(value p1, value p2) { - CAMLparam2(p1, p2); - CAMLlocal1(result); + CAMLparam2(p1, p2); + CAMLlocal1(result); - mlsize_t p1_len = caml_array_length(p1); - mlsize_t p2_len = caml_array_length(p2); - mlsize_t len = MAX(p1_len, p2_len); - mlsize_t i; + mlsize_t p1_len = caml_array_length(p1); + mlsize_t p2_len = caml_array_length(p2); + mlsize_t len = MAX(p1_len, p2_len); + mlsize_t i; - uint32_t c_p1[len], c_p2[len], c_out[len]; + uint32_t c_p1[len], c_p2[len], c_out[len]; - if (!xc_combine_cpu_featuresets) - raise_unix_errno_msg(ENOSYS, "xc_combine_cpu_featuresets"); + if (!xc_combine_cpu_featuresets) + raise_unix_errno_msg(ENOSYS, "xc_combine_cpu_featuresets"); - if (len == 0) - CAMLreturn(Atom(0)); + if (len == 0) + CAMLreturn(Atom(0)); - ocaml_int64_array_to_c_array(p1, c_p1, len); - ocaml_int64_array_to_c_array(p2, c_p2, len); + ocaml_int64_array_to_c_array(p1, c_p1, len); + ocaml_int64_array_to_c_array(p2, c_p2, len); - xc_combine_cpu_featuresets(c_p1, c_p2, c_out, len); + xc_combine_cpu_featuresets(c_p1, c_p2, c_out, len); - /* Turn c_out back into an Ocaml int64 array. */ - result = caml_alloc(len, 0); - for ( i = 0; i < len; ++i ) - Store_field(result, i, caml_copy_int64(c_out[i])); + /* Turn c_out back into an Ocaml int64 array. */ + result = caml_alloc(len, 0); + for (i = 0; i < len; ++i) + Store_field(result, i, caml_copy_int64(c_out[i])); - CAMLreturn(result); + CAMLreturn(result); } __attribute__((weak)) -const char *xc_cpu_featuresets_are_compatible( - const uint32_t *vm, const uint32_t *host, size_t len, char err[128]); +const char *xc_cpu_featuresets_are_compatible(const uint32_t *vm, + const uint32_t *host, + size_t len, char err[128]); /* int64 array (vm) -> int64 array (host) -> string option (None on success, string on failure) */ -CAMLprim value stub_xenctrlext_featuresets_are_compatible(value vm, value host) +CAMLprim value stub_xenctrlext_featuresets_are_compatible(value vm, + value host) { - CAMLparam2(vm, host); - CAMLlocal1(result); + CAMLparam2(vm, host); + CAMLlocal1(result); - mlsize_t vm_len = caml_array_length(vm); - mlsize_t host_len = caml_array_length(host); - mlsize_t len = MAX(vm_len, host_len); + mlsize_t vm_len = caml_array_length(vm); + mlsize_t host_len = caml_array_length(host); + mlsize_t len = MAX(vm_len, host_len); - uint32_t c_vm[len], c_host[len]; - char msg[128]; - const char *err; + uint32_t c_vm[len], c_host[len]; + char msg[128]; + const char *err; - if (!xc_cpu_featuresets_are_compatible) - raise_unix_errno_msg(ENOSYS, "xc_cpu_featuresets_are_compatible"); + if (!xc_cpu_featuresets_are_compatible) + raise_unix_errno_msg(ENOSYS, "xc_cpu_featuresets_are_compatible"); - ocaml_int64_array_to_c_array(vm, c_vm, len); - ocaml_int64_array_to_c_array(host, c_host, len); + ocaml_int64_array_to_c_array(vm, c_vm, len); + ocaml_int64_array_to_c_array(host, c_host, len); - err = xc_cpu_featuresets_are_compatible(c_vm, c_host, len, msg); + err = xc_cpu_featuresets_are_compatible(c_vm, c_host, len, msg); - if (!err) - result = Val_none; - else { - result = caml_alloc_small(1, Tag_some); - Store_field(result, 0, caml_copy_string(err)); - } + if (!err) + result = Val_none; + else { + result = caml_alloc_small(1, Tag_some); + Store_field(result, 0, caml_copy_string(err)); + } - CAMLreturn(result); + CAMLreturn(result); } CAMLprim value stub_xenforeignmemory_open(value unit) { - CAMLparam1(unit); - struct xenforeignmemory_handle *fmem; - CAMLlocal1(result); + CAMLparam1(unit); + struct xenforeignmemory_handle *fmem; + CAMLlocal1(result); - // allocate memory to store the result, if the call to get the xfm - // handle fails the ocaml GC will collect this abstract tag - result = caml_alloc(1, Abstract_tag); + // allocate memory to store the result, if the call to get the xfm + // handle fails the ocaml GC will collect this abstract tag + result = caml_alloc(1, Abstract_tag); - // use NULL instead of a xentoollog handle as those bindings are flawed - fmem = xenforeignmemory_open(NULL, 0); + // use NULL instead of a xentoollog handle as those bindings are flawed + fmem = xenforeignmemory_open(NULL, 0); - if(fmem == NULL) { - caml_failwith("Error when opening foreign memory handle"); - } + if (fmem == NULL) { + caml_failwith("Error when opening foreign memory handle"); + } - Xfm_val(result) = fmem; + Xfm_val(result) = fmem; - CAMLreturn(result); + CAMLreturn(result); } CAMLprim value stub_xenforeignmemory_close(value fmem) { - CAMLparam1(fmem); - int retval; - - if(Xfm_val(fmem) == NULL) { - caml_invalid_argument( - "Error: cannot close NULL foreign memory handle"); - } + CAMLparam1(fmem); + int retval; - retval = xenforeignmemory_close(Xfm_val(fmem)); + if (Xfm_val(fmem) == NULL) { + caml_invalid_argument + ("Error: cannot close NULL foreign memory handle"); + } - if(retval < 0) { - caml_failwith("Error when closing foreign memory handle"); - } + retval = xenforeignmemory_close(Xfm_val(fmem)); - // Protect against double close - Xfm_val(fmem) = NULL; + if (retval < 0) { + caml_failwith("Error when closing foreign memory handle"); + } + // Protect against double close + Xfm_val(fmem) = NULL; - CAMLreturn(Val_unit); + CAMLreturn(Val_unit); } CAMLprim value stub_xenforeignmemory_map(value fmem, value dom, - value prot_flags, value pages) -{ - CAMLparam4(fmem, dom, prot_flags, pages); - CAMLlocal2(cell, result); - size_t i, pages_length; - xen_pfn_t *arr; - int prot, the_errno; - void *retval; - xenforeignmemory_handle *handle = Xfm_val(fmem); - - if (Field(prot_flags, 0) == Val_false && - Field(prot_flags, 1) == Val_false && - Field(prot_flags, 2) == Val_false) { - prot = PROT_NONE; - } else { - prot = 0; - if(Field(prot_flags, 0) == Val_true) { - prot |= PROT_READ; - } - if(Field(prot_flags, 1) == Val_true) { - prot |= PROT_WRITE; - } - if(Field(prot_flags, 2) == Val_true) { - prot |= PROT_EXEC; - } + value prot_flags, value pages) +{ + CAMLparam4(fmem, dom, prot_flags, pages); + CAMLlocal2(cell, result); + size_t i, pages_length; + xen_pfn_t *arr; + int prot, the_errno; + void *retval; + xenforeignmemory_handle *handle = Xfm_val(fmem); + + if (Field(prot_flags, 0) == Val_false && + Field(prot_flags, 1) == Val_false && + Field(prot_flags, 2) == Val_false) { + prot = PROT_NONE; + } else { + prot = 0; + if (Field(prot_flags, 0) == Val_true) { + prot |= PROT_READ; } - - // traverse list to know the length of the array - cell = pages; - for(pages_length = 0; cell != Val_emptylist; pages_length++) { - cell = Field(cell, 1); + if (Field(prot_flags, 1) == Val_true) { + prot |= PROT_WRITE; } - - // allocate and populate the array - arr = malloc(sizeof(xen_pfn_t) * pages_length); - if(arr == NULL) { - caml_failwith("Error: could not allocate page array before mapping memory"); + if (Field(prot_flags, 2) == Val_true) { + prot |= PROT_EXEC; } + } - cell = pages; - for(i = 0; i < pages_length; i++) { - arr[i] = Int64_val(Field(cell, 0)); - cell = Field(cell, 1); - } + // traverse list to know the length of the array + cell = pages; + for (pages_length = 0; cell != Val_emptylist; pages_length++) { + cell = Field(cell, 1); + } - retval = xenforeignmemory_map - (handle, Int_val(dom), prot, pages_length, arr, NULL); - the_errno = errno; + // allocate and populate the array + arr = malloc(sizeof(xen_pfn_t) * pages_length); + if (arr == NULL) { + caml_failwith + ("Error: could not allocate page array before mapping memory"); + } - free(arr); + cell = pages; + for (i = 0; i < pages_length; i++) { + arr[i] = Int64_val(Field(cell, 0)); + cell = Field(cell, 1); + } - if(retval == NULL) { - raise_unix_errno_msg(the_errno, - "Error when trying to map foreign memory"); - } + int domain = Int_val(dom); + retval = xenforeignmemory_map + (handle, domain, prot, pages_length, arr, NULL); + the_errno = errno; - result = caml_ba_alloc_dims( - CAML_BA_CHAR | CAML_BA_C_LAYOUT | CAML_BA_EXTERNAL, 1, - retval, (long) 4096 * pages_length); + free(arr); - CAMLreturn(result); + if (retval == NULL) { + raise_unix_errno_msg(the_errno, + "Error when trying to map foreign memory"); + } + + result = + caml_ba_alloc_dims(CAML_BA_CHAR | CAML_BA_C_LAYOUT | + CAML_BA_EXTERNAL, 1, retval, + (long) 4096 * pages_length); + + CAMLreturn(result); } CAMLprim value stub_xenforeignmemory_unmap(value fmem, value mapping) { - CAMLparam2(fmem, mapping); - size_t pages; - int retval, the_errno; + CAMLparam2(fmem, mapping); + size_t pages; + int retval, the_errno; - // convert mapping to pages and addr - pages = Caml_ba_array_val(mapping)->dim[0] / 4096; + // convert mapping to pages and addr + pages = Caml_ba_array_val(mapping)->dim[0] / 4096; - retval = xenforeignmemory_unmap(Xfm_val(fmem), - Caml_ba_data_val(mapping), pages); - the_errno = errno; + retval = xenforeignmemory_unmap(Xfm_val(fmem), + Caml_ba_data_val(mapping), pages); + the_errno = errno; - if(retval < 0) { - raise_unix_errno_msg(the_errno, - "Error when trying to unmap foreign memory"); - } + if (retval < 0) { + raise_unix_errno_msg(the_errno, + "Error when trying to unmap foreign memory"); + } - CAMLreturn(Val_unit); + CAMLreturn(Val_unit); } -CAMLprim value stub_xenctrlext_domain_claim_pages(value xch_val, value domid_val, - value numa_node_val, value nr_pages_val) +CAMLprim value stub_xenctrlext_domain_claim_pages(value xch_val, + value domid_val, + value numa_node_val, + value nr_pages_val) { - CAMLparam4(xch_val, domid_val, numa_node_val, nr_pages_val); + CAMLparam4(xch_val, domid_val, numa_node_val, nr_pages_val); #ifdef XEN_DOMCTL_NUMA_OP_GET_NODE_PAGES - int retval, the_errno; - xc_interface* xch = xch_of_val(xch_val); - uint32_t domid = Int_val(domid_val); - unsigned int numa_node = Int_val(numa_node_val); - unsigned long nr_pages = Long_val(nr_pages_val); + int retval, the_errno; + xc_interface *xch = xch_of_val(xch_val); + uint32_t domid = Int_val(domid_val); + unsigned int numa_node = Int_val(numa_node_val); + unsigned long nr_pages = Long_val(nr_pages_val); - caml_release_runtime_system(); - retval = xc_domain_claim_pages_node(xch, domid, numa_node, nr_pages); - the_errno = errno; - caml_acquire_runtime_system(); + caml_release_runtime_system(); + retval = xc_domain_claim_pages_node(xch, domid, numa_node, nr_pages); + the_errno = errno; + caml_acquire_runtime_system(); - if(retval < 0) { - raise_unix_errno_msg(the_errno, - "Error when trying to claim memory pages"); - } + if (retval < 0) { + raise_unix_errno_msg(the_errno, + "Error when trying to claim memory pages"); + } #else - raise_unix_errno_msg(ENOSYS, "xc_domain_claim_pages_node"); + raise_unix_errno_msg(ENOSYS, "xc_domain_claim_pages_node"); #endif - CAMLreturn(Val_unit); + CAMLreturn(Val_unit); } #ifdef XEN_DOMCTL_NUMA_OP_GET_NODE_PAGES -CAMLprim value stub_xc_domain_numa_get_node_pages(value xch_val, value domid); +CAMLprim value stub_xc_domain_numa_get_node_pages(value xch_val, + value domid); #endif -CAMLprim value stub_xc_domain_numa_get_node_pages_wrapper(value xch_val, value domid) +CAMLprim value stub_xc_domain_numa_get_node_pages_wrapper(value xch_val, + value domid) { #ifdef XEN_DOMCTL_NUMA_OP_GET_NODE_PAGES - return stub_xc_domain_numa_get_node_pages(xch_val, domid); + return stub_xc_domain_numa_get_node_pages(xch_val, domid); #else - CAMLparam2(xch_val, domid); - caml_failwith(__func__); - CAMLnoreturn; + CAMLparam2(xch_val, domid); + caml_failwith(__func__); + CAMLnoreturn; #endif } @@ -723,94 +775,94 @@ CAMLprim value stub_xc_domain_numa_get_node_pages_wrapper(value xch_val, value d */ CAMLprim value stub_xenctrlext_numa_meminfo(value xch_val) { - CAMLparam1(xch_val); - CAMLlocal2(result, info); - xc_interface *xch = xch_of_val(xch_val); - unsigned int max_nodes = 0; - unsigned int i; - int ret; + CAMLparam1(xch_val); + CAMLlocal2(result, info); + xc_interface *xch = xch_of_val(xch_val); + unsigned int max_nodes = 0; + unsigned int i; + int ret; #ifdef XEN_SYSCTL_numa_meminfo - xen_sysctl_node_meminfo_t *meminfo = NULL; - - /* First call to get node count */ - caml_release_runtime_system(); - ret = xc_numa_meminfo(xch, &max_nodes, NULL); - caml_acquire_runtime_system(); - - if (ret == 0) { - /* New hypercall available, use it */ - meminfo = calloc(max_nodes, sizeof(*meminfo)); - if (!meminfo) - caml_raise_out_of_memory(); - - caml_release_runtime_system(); - ret = xc_numa_meminfo(xch, &max_nodes, meminfo); - caml_acquire_runtime_system(); - - if (ret < 0) { - int err = errno; - free(meminfo); - errno = err; - failwith_xc(xch); - } - - result = caml_alloc_tuple(max_nodes); - for (i = 0; i < max_nodes; i++) { - info = caml_alloc_tuple(3); - Store_field(info, 0, caml_copy_int64(meminfo[i].size)); - Store_field(info, 1, caml_copy_int64(meminfo[i].free)); - Store_field(info, 2, caml_copy_int64(meminfo[i].claimed)); - Store_field(result, i, info); - } - - free(meminfo); - CAMLreturn(result); - } - - /* If we get ENOSYS or EOPNOTSUPP, fall back to old hypercall */ - if (errno != ENOSYS && errno != EOPNOTSUPP) - failwith_xc(xch); + xen_sysctl_node_meminfo_t *meminfo = NULL; + + /* First call to get node count */ + caml_release_runtime_system(); + ret = xc_numa_meminfo(xch, &max_nodes, NULL); + caml_acquire_runtime_system(); + + if (ret == 0) { + /* New hypercall available, use it */ + meminfo = calloc(max_nodes, sizeof(*meminfo)); + if (!meminfo) + caml_raise_out_of_memory(); + + caml_release_runtime_system(); + ret = xc_numa_meminfo(xch, &max_nodes, meminfo); + caml_acquire_runtime_system(); + + if (ret < 0) { + int err = errno; + free(meminfo); + errno = err; + failwith_xc(xch); + } + + result = caml_alloc_tuple(max_nodes); + for (i = 0; i < max_nodes; i++) { + info = caml_alloc_tuple(3); + Store_field(info, 0, caml_copy_int64(meminfo[i].size)); + Store_field(info, 1, caml_copy_int64(meminfo[i].free)); + Store_field(info, 2, caml_copy_int64(meminfo[i].claimed)); + Store_field(result, i, info); + } + + free(meminfo); + CAMLreturn(result); + } + + /* If we get ENOSYS or EOPNOTSUPP, fall back to old hypercall */ + if (errno != ENOSYS && errno != EOPNOTSUPP) + failwith_xc(xch); #endif - /* Fallback: use xc_numainfo with claimed=0 */ - { - xc_meminfo_t *old_meminfo = NULL; - - caml_release_runtime_system(); - ret = xc_numainfo(xch, &max_nodes, NULL, NULL); - caml_acquire_runtime_system(); - - if (ret < 0) - failwith_xc(xch); - - old_meminfo = calloc(max_nodes, sizeof(*old_meminfo)); - if (!old_meminfo) - caml_raise_out_of_memory(); - - caml_release_runtime_system(); - ret = xc_numainfo(xch, &max_nodes, old_meminfo, NULL); - caml_acquire_runtime_system(); - - if (ret < 0) { - int err = errno; - free(old_meminfo); - errno = err; - failwith_xc(xch); - } - - result = caml_alloc_tuple(max_nodes); - for (i = 0; i < max_nodes; i++) { - info = caml_alloc_tuple(3); - Store_field(info, 0, caml_copy_int64(old_meminfo[i].memsize)); - Store_field(info, 1, caml_copy_int64(old_meminfo[i].memfree)); - Store_field(info, 2, caml_copy_int64(0)); /* claimed=0 */ - Store_field(result, i, info); - } - - free(old_meminfo); - CAMLreturn(result); - } + /* Fallback: use xc_numainfo with claimed=0 */ + { + xc_meminfo_t *old_meminfo = NULL; + + caml_release_runtime_system(); + ret = xc_numainfo(xch, &max_nodes, NULL, NULL); + caml_acquire_runtime_system(); + + if (ret < 0) + failwith_xc(xch); + + old_meminfo = calloc(max_nodes, sizeof(*old_meminfo)); + if (!old_meminfo) + caml_raise_out_of_memory(); + + caml_release_runtime_system(); + ret = xc_numainfo(xch, &max_nodes, old_meminfo, NULL); + caml_acquire_runtime_system(); + + if (ret < 0) { + int err = errno; + free(old_meminfo); + errno = err; + failwith_xc(xch); + } + + result = caml_alloc_tuple(max_nodes); + for (i = 0; i < max_nodes; i++) { + info = caml_alloc_tuple(3); + Store_field(info, 0, caml_copy_int64(old_meminfo[i].memsize)); + Store_field(info, 1, caml_copy_int64(old_meminfo[i].memfree)); + Store_field(info, 2, caml_copy_int64(0)); /* claimed=0 */ + Store_field(result, i, info); + } + + free(old_meminfo); + CAMLreturn(result); + } } diff --git a/ocaml/quicktest/dune b/ocaml/quicktest/dune index 9183649b664..cfffb07f00d 100644 --- a/ocaml/quicktest/dune +++ b/ocaml/quicktest/dune @@ -9,10 +9,15 @@ ezxenstore ezxenstore.watch fmt + fmt.tty forkexec http_lib mtime mtime.clock.os + cli_progress_bar + quicktest_trace + quicktest_trace_api + quicktest_trace_rpc pam qcheck-alcotest result @@ -25,6 +30,7 @@ threads.posix unix uuid + backtrace xapi-client xapi-consts xapi-datamodel diff --git a/ocaml/quicktest/qt.ml b/ocaml/quicktest/qt.ml index 5cb8b342f81..0414a9136ae 100644 --- a/ocaml/quicktest/qt.ml +++ b/ocaml/quicktest/qt.ml @@ -5,6 +5,7 @@ type sr_info = { ; allowed_operations: API.storage_operations_set ; capabilities: string list ; required_sm_api_version: string + ; is_iso: bool } let init_session rpc username password = @@ -152,16 +153,29 @@ module VM = struct cmd @ Option.fold ~none:[] ~some:(fun x -> ["sr-uuid=" ^ x]) sr_uuid in let newvm_uuid = cli_cmd cmd in - Client.Client.VM.get_by_uuid ~rpc ~session_id ~uuid:newvm_uuid + (newvm_uuid, Client.Client.VM.get_by_uuid ~rpc ~session_id ~uuid:newvm_uuid) let uninstall rpc session_id vm = let uuid = Client.Client.VM.get_uuid ~rpc ~session_id ~self:vm in cli_cmd ["vm-uninstall"; "uuid=" ^ uuid; "--force"] |> ignore - let with_new rpc session_id ~template ?sr f = - let vm = + let with_new rpc session_id ~template ?iso ?sr f = + let uuid, vm = install rpc session_id ~template ~name:"temp_quicktest_vm" ?sr () in + iso + |> Option.iter (fun iso -> + let (_ : string) = + cli_cmd + [ + "vm-cd-add" + ; "uuid=" ^ uuid + ; "cd-name=" ^ iso.API.vDI_name_label + ; "device=0" + ] + in + () + ) ; Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> f vm) (fun () -> uninstall rpc session_id vm) diff --git a/ocaml/quicktest/qt.mli b/ocaml/quicktest/qt.mli index e0f2bb8acb2..aaa5e211732 100644 --- a/ocaml/quicktest/qt.mli +++ b/ocaml/quicktest/qt.mli @@ -9,6 +9,7 @@ type sr_info = { ; allowed_operations: API.storage_operations_set ; capabilities: string list ; required_sm_api_version: string + ; is_iso: bool } val init_session : rpc -> string -> string -> API.ref_session @@ -53,6 +54,7 @@ module VM : sig rpc -> API.ref_session -> template:API.ref_VM + -> ?iso:API.vDI_t -> ?sr:API.ref_SR -> (API.ref_VM -> 'a) -> 'a diff --git a/ocaml/quicktest/qt_filter.ml b/ocaml/quicktest/qt_filter.ml index 7ad7e2e8307..413dff0d4b0 100644 --- a/ocaml/quicktest/qt_filter.ml +++ b/ocaml/quicktest/qt_filter.ml @@ -104,6 +104,9 @@ module SR = struct (* Even though the SM backend may expose a VDI_CREATE capability attempts to actually create a VDI will fail in (eg) the tools SR and any that happen to be R/O NFS exports *) + let is_iso_sr = + Client.Client.SR.get_content_type ~rpc ~session_id ~self:sr = "iso" + in let avoid_vdi_create session_id sr = let other_config = Client.Client.SR.get_other_config ~rpc ~session_id ~self:sr @@ -111,9 +114,6 @@ module SR = struct let is_tools_sr = Client.Client.SR.get_is_tools_sr ~rpc ~session_id ~self:sr in - let is_iso_sr = - Client.Client.SR.get_content_type ~rpc ~session_id ~self:sr = "iso" - in let special_key = "quicktest-no-VDI_CREATE" in let is_marked = List.mem_assoc special_key other_config @@ -155,13 +155,13 @@ module SR = struct else ops in - (ops, caps, sm.API.sM_required_api_version) + (ops, caps, sm.API.sM_required_api_version, is_iso_sr) in - let allowed_operations, capabilities, required_sm_api_version = + let allowed_operations, capabilities, required_sm_api_version, is_iso = get_sr_features session_id sr in let open Qt in - {sr; allowed_operations; capabilities; required_sm_api_version} + {sr; allowed_operations; capabilities; required_sm_api_version; is_iso} let list_srs_connected_to_localhost rpc session_id = let is_attached = @@ -234,6 +234,15 @@ module SR = struct let sr_filter f srs () = List.filter f (srs ()) + let iso_srs () = + with_xapi_query @@ fun () -> + Lazy.force all_srs + |> List.filter (fun sr_info -> + Client.Client.SR.get_content_type ~rpc:!A.rpc ~session_id:!session_id + ~self:sr_info.Qt.sr + = "iso" + ) + let not_iso = sr_filter (fun sr_info -> Client.Client.SR.get_content_type ~rpc:!A.rpc ~session_id:!session_id @@ -241,6 +250,13 @@ module SR = struct <> "iso" ) + let is_iso = + sr_filter (fun sr_info -> + Client.Client.SR.get_content_type ~rpc:!A.rpc ~session_id:!session_id + ~self:sr_info.Qt.sr + = "iso" + ) + let is_empty = function [] -> true | _ :: _ -> false let with_any_vdi = @@ -346,3 +362,33 @@ let vm_template template_name = | Some vm_template -> [(name, speed, test vm_template)] ) + +let find_memtest_iso ~prefix srs = + with_xapi_query @@ fun () -> + let isos = + srs + |> List.concat_map @@ fun iso_info -> + let expr = + Printf.sprintf {|field "SR" = "%s"|} (Ref.string_of iso_info.Qt.sr) + in + Client.Client.VDI.get_all_records_where ~rpc:!A.rpc + ~session_id:!session_id ~expr + |> List.filter (fun (_, iso) -> + String.starts_with ~prefix iso.API.vDI_name_label + ) + in + isos + |> List.sort (fun (_, a) (_, b) -> + -String.compare a.API.vDI_name_label b.API.vDI_name_label + ) + +let memtest_iso ?(prefix = "memtest") tcs = + let isos = find_memtest_iso ~prefix (SR.iso_srs ()) in + tcs + |> for_each @@ fun (name, speed, test) -> + match isos with + | [] -> + [] + | (_, iso) :: _ -> + Printf.eprintf "Choosing ISO %S\n%!" iso.API.vDI_name_label ; + [(name, speed, test iso)] diff --git a/ocaml/quicktest/qt_filter.mli b/ocaml/quicktest/qt_filter.mli index afd271839ef..2b0d4492941 100644 --- a/ocaml/quicktest/qt_filter.mli +++ b/ocaml/quicktest/qt_filter.mli @@ -49,8 +49,12 @@ module SR : sig val random : srs -> srs + val iso_srs : srs + val not_iso : srs -> srs + val is_iso : srs -> srs + val with_any_vdi : srs -> srs (** Selects SRs that either have a VDI or we can create & destroy a VDI on them. This filter should be called from tests using [VDI.with_any] *) @@ -86,3 +90,5 @@ end val sr : SR.srs -> (Qt.sr_info -> 'b, 'b) filter val vm_template : string -> (API.ref_VM -> 'b, 'b) filter + +val memtest_iso : ?prefix:string -> (API.vDI_t -> 'a, 'a) filter diff --git a/ocaml/quicktest/quicktest.ml b/ocaml/quicktest/quicktest.ml index 337b1ae2b3e..d996b571504 100644 --- a/ocaml/quicktest/quicktest.ml +++ b/ocaml/quicktest/quicktest.ml @@ -19,12 +19,45 @@ let qchecks = |> List.map @@ fun (name, test) -> (name, List.map QCheck_alcotest.(to_alcotest ~long:true) test) +let setup_tty () = + let style_renderer = + if !Quicktest_args.use_colour then + (* use default style, auto-detect color support *) + None + else + (* never use color *) + Some `None + in + Fmt_tty.setup_std_outputs ?style_renderer () + +let wrap f = + setup_tty () ; + let open Quicktest_trace in + Opentelemetry.Globals.service_name := "quicktest" ; + TeeBackend.with_default_setup () @@ fun () -> + Sys.catch_break true ; + () |> Debug.with_thread_associated "quicktest" @@ fun () -> Qt_filter.wrap f + let () = Quicktest_args.parse () ; - Qt_filter.wrap (fun () -> + wrap (fun () -> let suite = [ - ("Quicktest_example", Quicktest_example.tests ()) + ( "Quicktest_vm_calibrate_cleanup0" + , Quicktest_vm_calibrate.tests_cleanup () + ) + ; ("Quicktest_vm_calibrate", Quicktest_vm_calibrate.tests ()) + ; ( "Quicktest_vm_calibrate_cleanup1" + , Quicktest_vm_calibrate.tests_cleanup () + ) + ; ( "Quicktest_vm_calibrate_cleanup00" + , Quicktest_vm_calibrate.tests_cleanup () + ) + ; ("Quicktest_vm_memory", Quicktest_vm_memory.tests ()) + ; ( "Quicktest_vm_calibrate_cleanup2" + , Quicktest_vm_calibrate.tests_cleanup () + ) + ; ("Quicktest_example", Quicktest_example.tests ()) ; ("Quicktest_message", Quicktest_message.tests ()) ; ("xenstore", Quicktest_xenstore.tests ()) ; ("cbt", Quicktest_cbt.tests ()) diff --git a/ocaml/quicktest/quicktest_api_helpers.ml b/ocaml/quicktest/quicktest_api_helpers.ml new file mode 100644 index 00000000000..cff7ad2fd0b --- /dev/null +++ b/ocaml/quicktest/quicktest_api_helpers.ml @@ -0,0 +1,476 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Client.Client +open Quicktest_trace +open Quicktest_trace_api +open Quicktest_trace_rpc + +(** [points_between var_min var_max] generates a sequence of values between + [[var_min, var_max]], where [var_max] is not necessarily a power of 2. + The points inbetween are [var_min * 2**i] + *) +let points_between var_min var_max = + Seq.append + (Seq.unfold + (fun i -> + if i < var_max then + Some (i, Int64.shift_left i 1) + else + None + ) + var_min + ) + (Seq.return var_max) + +let localhost_free_pages scope = + Xenctrl.with_intf @@ fun xc -> + let host_info = Xenctrl.physinfo xc in + let pages = host_info.Xenctrl.free_pages |> Int64.of_nativeint in + Scope.add_metrics scope (fun () -> + Opentelemetry.Metrics.( + gauge ~name:"host_free_pages" [int (Int64.to_int pages)] + ) + ) ; + pages + +let rec stable_localhost_free_pages scope t ~host = + let rec loop scope delay = + let () = + Trace.with_ ~force_new_trace_id:true "wait_no_active_tasks" @@ fun _ -> + Api.wait_no_active_tasks t ~host + in + let v0 = localhost_free_pages scope in + if delay > Float.epsilon then + (* there may be some pending tasks in Xen, wait for them to finish *) + Thread.delay delay ; + let v1 = localhost_free_pages scope in + if v1 <> v0 then begin + let () = + Scope.add_event scope @@ fun () -> + Opentelemetry.Event.make + ~attrs: + [ + ("free_pages0", `Int (Int64.to_int v0)) + ; ("free_pages1", `Int (Int64.to_int v1)) + ; ("delay", `Float delay) + ] + "Free pages is not stable" + in + let delay = + if delay > Float.epsilon then + delay *. 2. + else + 0.005 + in + loop scope (delay *. 2.) + end else + v1 + in + Trace.with_ ~scope __FUNCTION__ @@ fun scope -> loop scope 0. + +let with_measure_memory_pages scope t ~localhost f = + Trace.with_ ~scope __FUNCTION__ @@ fun scope -> + let pages0 = stable_localhost_free_pages scope t ~host:localhost in + f () ; + let pages1 = stable_localhost_free_pages scope t ~host:localhost in + let delta = Int64.sub pages0 pages1 in + Scope.add_event scope (fun () -> + Opentelemetry.Event.make + ~attrs: + [ + ("pages0", `Int (Int64.to_int pages0)) + ; ("pages1", `Int (Int64.to_int pages1)) + ; ("delta", `Int (Int64.to_int delta)) + ] + "memory usage delta" + ) ; + delta + +let check_tasks tasks = + tasks |> List.map @@ function Ok x -> x | Error exn -> raise exn + +let prefix = "temp_quicktest" + +module NameMap = Map.Make (String) + +let ensure_vm_clones t ~vm n purpose = + Trace.with_ __FUNCTION__ ~attrs:[("purpose", `String purpose); ("n", `Int n)] + @@ fun _scope -> + let names = + List.init n (fun i -> Printf.sprintf "%s-%s-%03d" prefix purpose i) + in + (* we might want to look up hundreds of VMs, faster retrieve all and filter + locally *) + let all_vms = + call t @@ VM.get_all_records + |> List.to_seq + |> Seq.map (fun (vmref, vm) -> (vm.API.vM_name_label, (vmref, vm))) + |> NameMap.of_seq + in + let existing_vms, tasks = + names + |> List.partition_map @@ fun label -> + match NameMap.find_opt label all_vms with + | Some found -> + Either.left found + | None -> + Either.right (Api.VM.Async.clone ~vm ~new_name:label) + in + + let new_vms = + Api.batched_run_or_cancel t "Cloning new VM(s)" tasks |> check_tasks + in + + (* May have been used by a previous test: hard shutdown. + We cleanup at the beginning of a test, not the end, + because we want to make 'pause-on-fail' possible. + (e.g. it may take a while to clone and boot hundreds of VMs to trigger a + test, so we shouldn't throw that all away on an error, to make it easier + to reproduce and debug) + *) + let tasks = + existing_vms + |> List.filter_map (fun (vmref, vm) -> + if vm.API.vM_power_state <> `Halted then + Some (Api.VM.Async.hard_shutdown ~vm:vmref) + else + None + ) + in + let (_ : _ list) = + Api.batched_run_or_cancel t "Hard shutdown existing VM(s)" tasks + |> check_tasks + in + List.concat [existing_vms |> List.map fst; new_vms] + +let div_round_up a b = Int64.(div (add a @@ pred b) b) + +let ignore_list (_ : _ list) = () + +let pagesize () = Int64.shift_left (Xenctrl.pages_to_kib 1L) 10 + +let lifecycle_attrs _t items = [("n", `Int (List.length items))] + +let start_vms_parallel t host_vms = + Trace.with_ __FUNCTION__ ~attrs:(lifecycle_attrs t host_vms) @@ fun _ -> + let tasks = + host_vms + |> List.map @@ fun (host, vm) -> + Api.VM.Async.start_on ~host ~vm ~start_paused:true ~force:false + in + Api.batched_run_or_cancel t "Start VM(s)/parallel" tasks + |> check_tasks + |> ignore_list + +let hard_reboot_vms t vms = + Trace.with_ __FUNCTION__ ~attrs:(lifecycle_attrs t vms) @@ fun _ -> + let tasks = vms |> List.map @@ fun (_, vm) -> Api.VM.Async.hard_reboot ~vm in + Api.batched_run_or_cancel t "Hard reboot VMs/parallel" tasks + |> check_tasks + |> ignore_list + +let start_vm t ~host ~vm = + (* invoke the async version so we can see the progress, + starting big VMs can be slow + *) + Api.batched_run_or_cancel t "Start VM" + [Api.VM.Async.start_on ~vm ~host ~force:false ~start_paused:true] + |> check_tasks + |> fun (_ : _ list) -> () + +let start_vms_seq t host_vms = + Trace.with_ __FUNCTION__ ~attrs:(lifecycle_attrs t host_vms) @@ fun _ -> + host_vms + |> List.iter @@ fun (host, vm) -> + if call t @@ VM.get_power_state ~self:vm = `Halted then + start_vm t ~host ~vm + +let start_vms t host_vms = + Trace.with_ __FUNCTION__ @@ fun scope -> + try start_vms_parallel t host_vms + with Api_errors.Server_error _ as exn -> + let bt = Printexc.get_raw_backtrace () in + Backtrace.is_important exn ; + Scope.add_event scope (fun () -> + Opentelemetry.Event.make "Parallel start failed" + ) ; + (* Try to start the remainig VMs sequentially *) + start_vms_seq t host_vms ; + Scope.add_event scope (fun () -> + Opentelemetry.Event.make "Sequential start succeeded" + ) ; + (* raise the parallel failure *) + Printexc.raise_with_backtrace exn bt + +let workload_host' t ~host ~workload_vm = + let vm = workload_vm in + Trace.with_ __FUNCTION__ @@ fun scope -> + let host_cpus = call t @@ Host.get_host_CPUs ~self:host |> List.length in + let vcpus = + (* can't use more vCPUs than the host has, and we support a maximum of 32 + for untrusted VMs *) + min 32 host_cpus |> Int64.of_int + in + let free_mem = call t @@ Host.compute_free_memory ~host in + Api.VM.call_set t VM.set_VCPUs_max ~self:vm ~value:vcpus ; + Api.VM.call_set t VM.set_VCPUs_at_startup ~self:vm ~value:vcpus ; + (* ensure that all host CPUs are busy with at least 1 vCPU. + For simplicity we rounding up, so the last VM may actually overload the host *) + let n = div_round_up (Int64.of_int host_cpus) vcpus |> Int64.to_int in + let memory_min = Api.VM.call_get t VM.get_memory_static_min ~self:vm in + let value = + (* Try to use at least 10% of host free memory with our N workloads in + total, or N*memory_min, whichever is higher. *) + max memory_min (Int64.div (Int64.div free_mem 10L) (Int64.of_int n)) + in + Api.VM.call_set t VM.set_memory ~self:vm ~value ; + + Scope.add_attrs scope (fun () -> + [ + ("vcpus", `Int (Int64.to_int vcpus)) + ; ("memory_bytes", `Int (Int64.to_int memory_min)) + ; ("n", `Int n) + ] + ) ; + ensure_vm_clones t ~vm n (Printf.sprintf "workload-%s" @@ Ref.string_of host) + |> List.map @@ fun vm -> (host, vm) + +let run_vms t host_vms = + start_vms t host_vms ; + let tasks = + List.map + (fun (_, vm) t -> + Api.VM.task t "unpause" ignore vm @@ Async.VM.unpause ~vm + ) + host_vms + in + Api.batched_run_or_cancel t "unpause" tasks |> check_tasks |> ignore_list + +let workload t ~host ~workload_vm = + workload_host' t ~host ~workload_vm |> run_vms t + +let workload_pool t ~workload_vm = + let hosts = call t @@ Host.get_all in + hosts + |> List.concat_map (fun host -> workload_host' t ~host ~workload_vm) + |> run_vms t + +let shutdown_vms t = function + | [] -> + () + | vms -> + let tasks = vms |> List.map @@ fun vm -> Api.VM.Async.hard_shutdown ~vm in + Api.batched_run_or_cancel t "hard_shutdown VM(s)" tasks + |> check_tasks + |> ignore_list + +let fill_mem_pow2' ?total t ~host ~vm = + let memory_min = call t @@ VM.get_memory_static_min ~self:vm in + let host_free_mem = call t @@ Host.compute_free_memory ~host in + let total = Option.value total ~default:host_free_mem in + Trace.with_ __FUNCTION__ + ~attrs: + [ + ("vm_memory_min", `Int (Int64.to_int memory_min)) + ; ("host_free_bytes", `Int (Int64.to_int host_free_mem)) + ; ("total", `Int (Int64.to_int total)) + ] + @@ fun scope -> + let sizes = + Trace.with_ ~scope "compute and set VM sizes" @@ fun _scope -> + Seq.unfold + (fun total -> + let half = Int64.div total 2L in + let next = + if half < memory_min then + total + else + half + in + if next <= 0L then + None + else begin + let value = + Api.VM.with_call t "maximise_memory" vm + @@ VM.maximise_memory ~self:vm ~approximate:false ~total:next + in + if value < memory_min then + None + else begin + Api.VM.call_set t VM.set_memory ~self:vm ~value ; + let overhead = + Api.VM.with_call t "compute_memory_overhead" vm + @@ VM.compute_memory_overhead ~vm + in + Scope.add_event scope (fun () -> + Opentelemetry.Event.make + ~attrs: + [ + ("amount to fill (bytes)", `Int (Int64.to_int next)) + ; ("VM memory (bytes)", `Int (Int64.to_int value)) + ; ("computed overhead (bytes)", `Int (Int64.to_int overhead)) + ] + "try to fill host memory" + ) ; + Some (value, Int64.sub total (Int64.add value overhead)) + end + end + ) + total + |> List.of_seq + in + let vms = ensure_vm_clones t ~vm (List.length sizes) "fillmem" in + let () = + List.combine vms sizes + |> List.iter @@ fun (self, value) -> + Api.VM.call_set t VM.set_memory ~self ~value + in + vms |> List.map (fun vm -> (host, vm)) |> start_vms t ; + + Scope.add_event scope (fun () -> + Opentelemetry.Event.make "Parallel start\n succeeded" + ) ; + vms + +let fill_mem_pow2 ?total t ~host ~vm = + Trace.with_ __FUNCTION__ @@ fun scope -> + let vms = fill_mem_pow2' ?total t ~host ~vm in + Trace.with_ ~scope "Shutdown VMs on success" @@ fun _ -> shutdown_vms t vms + +let maximise_memory t ~vm ~total = + let value = + Api.VM.with_call t "maximise_memory" vm + @@ VM.maximise_memory ~self:vm ~approximate:false ~total + in + (* XS8+ bug workaround: migration double counts overhead *) + Api.VM.call_set t VM.set_memory ~self:vm ~value ; + let overhead = + Api.VM.with_call t "compute_memory_overhead" vm + @@ VM.compute_memory_overhead ~vm + in + let total = Int64.sub total overhead in + let value = + Api.VM.with_call t "maximise_memory" vm + @@ VM.maximise_memory ~self:vm ~approximate:false ~total + in + value + +let fill_mem_n ?(workaround_migration = false) ?total t ~host ~vm ~n = + Trace.with_ __FUNCTION__ @@ fun scope -> + assert (n > 0) ; + let host_free_mem = call t @@ Host.compute_free_memory ~host in + let total = Option.value total ~default:host_free_mem in + let value = + (* rounded down, will fill remainder below in last_value *) + let total = Int64.div total (Int64.of_int n) in + maximise_memory t ~vm ~total + in + + let last_value = + Api.VM.call_set t VM.set_memory ~self:vm ~value ; + let overhead = + Api.VM.with_call t "compute_memory_overhead" vm + @@ VM.compute_memory_overhead ~vm + in + let overhead = + if workaround_migration then + Int64.mul 2L overhead + else + overhead + (* XS8+ bug: double counts overhead for migration *) + in + (* division may not be exact, fill remainder *) + let total = + Int64.(sub total @@ mul (add value overhead) @@ Int64.of_int @@ (n - 1)) + in + maximise_memory t ~vm ~total + in + let sizes = + List.init n @@ fun i -> + if i = n - 1 then + last_value + else + value + in + let vms = + ensure_vm_clones t ~vm (List.length sizes) (Printf.sprintf "fillmem-%d" n) + in + let full_sizes = + List.combine vms sizes + |> List.map @@ fun (self, value) -> + Api.VM.call_set t VM.set_memory ~self ~value ; + let overhead = + Api.VM.with_call t "compute_memory_overhead" vm + @@ VM.compute_memory_overhead ~vm:self + in + Int64.(add value overhead) + in + let sum = List.fold_left Int64.add 0L sizes in + let sum_full_sizes = List.fold_left Int64.add 0L full_sizes in + Scope.add_event scope (fun () -> + Opentelemetry.Event.make "fill_mem_n_sizes" + ~attrs: + [ + ("host_free_memory_bytes", `Int (Int64.to_int host_free_mem)) + ; ("total", `Int (Int64.to_int total)) + ; ("sum", `Int (Int64.to_int sum)) + ; ("n", `Int n) + ; ("vm_memory_bytes", `Int (Int64.to_int value)) + ; ("last_vm_memory_bytes", `Int (Int64.to_int last_value)) + ; ("sum", `Int (Int64.to_int sum)) + ; ("sum_full_sizes", `Int (Int64.to_int sum_full_sizes)) + ; ( "host_free_remaining_bytes" + , `Int Int64.(sub host_free_mem sum_full_sizes |> to_int) + ) + ] + ) ; + + let host_vms = vms |> List.map (fun vm -> (host, vm)) in + start_vms t host_vms ; + let host_free_mem = call t @@ Host.compute_free_memory ~host in + let actual_free_mem = Int64.mul (pagesize ()) @@ localhost_free_pages scope in + Scope.add_event scope (fun () -> + Opentelemetry.Event.make "afterfill_mem" + ~attrs: + [ + ("host_free_mem", `Int (Int64.to_int host_free_mem)) + ; ("actual_free_mem", `Int (Int64.to_int actual_free_mem)) + ] + ) ; + host_vms + +let cleanup rpc session_id () = + Trace.with_ __FUNCTION__ @@ fun _ -> + let t = {rpc= RPC.wrap rpc; session_id} in + let vms = call t @@ VM.get_all_records in + let vms = + vms + |> List.filter (fun (_, vm) -> + String.starts_with ~prefix vm.API.vM_name_label + ) + in + let not_halted = + vms + |> List.filter_map @@ fun (self, vm) -> + if vm.API.vM_power_state <> `Halted then + Some self + else + None + in + shutdown_vms t not_halted ; + vms + |> List.iter @@ fun (self, _) -> + (* TODO: vm-uninstall instead? but it is slow, and we have no disks *) + Api.VM.with_call t "destroy" self @@ VM.destroy ~self diff --git a/ocaml/quicktest/quicktest_api_helpers.mli b/ocaml/quicktest/quicktest_api_helpers.mli new file mode 100644 index 00000000000..a3a72f3d8f4 --- /dev/null +++ b/ocaml/quicktest/quicktest_api_helpers.mli @@ -0,0 +1,136 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val points_between : int64 -> int64 -> int64 Seq.t +(** [points_between a b] generate a sequence of test values between [a, b]. + Currently this generates [a*2**i], and always includes [b] exactly. +*) + +val localhost_free_pages : Quicktest_trace.Scope.t -> int64 +(** [localhost_free_pages scope] asks Xen for the actual free pages on the +local host. *) + +val stable_localhost_free_pages : + Quicktest_trace.Scope.t -> Client.Client.client -> host:API.ref_host -> int64 +(** [stable_localhost_free_pages scope client ~host] + waits until there no active tasks on [host], and then repeatedly queries + [localhost_free_pages] with a delay inbetween until it stabilizes. +*) + +val with_measure_memory_pages : + Quicktest_trace.Scope.t + -> Client.Client.client + -> localhost:API.ref_host + -> (unit -> unit) + -> int64 +(** [with_measure_memory_pages scope client ~localhost f] + measures the amount of actual free memory on the host + before and after calling [f ()], and returns the difference. +*) + +val check_tasks : ('a, exn) result list -> 'a list +(** [check_tasks tasks] raises an exception if [tasks] contains errors. *) + +val ensure_vm_clones : + Client.Client.client + -> vm:Quicktest_trace_api__Api.VM.dbref + -> int + -> string + -> [`VM] API.Ref.t list +(** [ensure_vm_clones client ~vm n purpose] ensures that there are at least [n] +clones of [vm] for [purpose]. + This creates new VM clones as needed, or reuses existing ones. + It ensures that existing VMs are halted. +*) + +val pagesize : unit -> int64 +(** [pagesize ()] the CPU's pagesize. On x86-64 this is always 4096. *) + +val start_vm : + Client.Client.client + -> host:[< `host] Ref.t + -> vm:Quicktest_trace_api__Api.VM.dbref + -> unit +(** [start_vm client ~host ~vm] starts a single [vm] on [host], tracking progress. *) + +val start_vms : + Client.Client.client -> (API.ref_host * [`VM] API.Ref.t) list -> unit +(** [start_vms client host_vms] starts [vm]s on [host], where [host_vms] is + a list of [host], [vm] pairs; and track progress. + It attempts to perform a parallel start first, and if that fails, + it attempts to start any remaining VMs sequentially. + If the parallel start fails this always raises an exception at the end. +*) + +val hard_reboot_vms : + Client.Client.client -> (API.ref_host * [`VM] API.Ref.t) list -> unit +(** [hard_reboot_vms client host_vms] hard reboots all [vms]. +*) + +val shutdown_vms : + Client.Client.client -> Quicktest_trace_api__Api.VM.dbref list -> unit +(** [shutdown_vms client vms] hard shutdowns [vms], tracking progress. *) + +val fill_mem_pow2' : + ?total:int64 + -> Client.Client.client + -> host:[`host] API.Ref.t + -> vm:[`VM] API.Ref.t + -> API.ref_VM list + +val fill_mem_pow2 : + ?total:int64 + -> Client.Client.client + -> host:[`host] API.Ref.t + -> vm:[`VM] API.Ref.t + -> unit +(** [fill_mem_pow2 ?total client ~host ~vm] fills the available memory on [host] in + power of 2 increments, trying to ensure that the computed VM sizes sum up to exactly + the amount of available free memory on the host, including VM memory overhead. + It may not completely fill available memory due to rounding. +*) + +val workload : + Client.Client.client -> host:API.ref_host -> workload_vm:API.ref_VM -> unit +(** [workload client ~host ~workload_vm] fills all CPUs in [host] with [workload_vm]. + The VMs will use a small amount of memory, but together they will use all + CPUs on a host. (due to configuration limits we can't always create a single + VM to fill the entire host) +*) + +val workload_pool : Client.Client.client -> workload_vm:API.ref_VM -> unit +(** [workload_pool client ~workload_vm] is like {!val:workload}, but starts the VMs on all hosts in the pool *) + +val fill_mem_n : + ?workaround_migration:bool + -> ?total:int64 + -> Client.Client.client + -> host:[`host] API.Ref.t + -> vm:[`VM] API.Ref.t + -> n:int + -> (API.ref_host * API.ref_VM) list +(** [fill_mem_n client ~host ~vm ~n] fills the available memory on [host] with + [n] VMs of approximatively equal sizes, trying to ensure that the computed VM sizes sum up to exactly + the amount of available free memory on the host, including VM memory overhead. + It may not completely fill available memory due to rounding. + The VMs are started prior to returning from this function +*) + +val cleanup : (Rpc.call -> Rpc.response) -> API.ref_session -> unit -> unit +(** [cleanup rpc session ()] hard shutdowns all VMs used by these tests, + and deletes the clones. + + For pause on fail to work, this should be a separate quicktest invoked before + VM calibration tests. +*) diff --git a/ocaml/quicktest/quicktest_args.ml b/ocaml/quicktest/quicktest_args.ml index e8ed3442943..a46f1d76e71 100644 --- a/ocaml/quicktest/quicktest_args.ml +++ b/ocaml/quicktest/quicktest_args.ml @@ -22,7 +22,16 @@ let using_unix_domain_socket = ref true let http = Xmlrpc_client.xmlrpc ~version:"1.1" "/" +let update_http http = + let headers = Quicktest_trace_rpc.RPC.http_headers () in + Http.Request. + { + http with + additional_headers= List.rev_append headers http.additional_headers + } + let rpc_remote xml = + let http = update_http http in Xmlrpc_client.XMLRPC_protocol.rpc ~srcstr:"quicktest" ~dststr:"xapi" ~transport: (SSL @@ -34,6 +43,7 @@ let rpc_remote xml = ~http xml let rpc_unix_domain xml = + let http = update_http http in Xmlrpc_client.XMLRPC_protocol.rpc ~srcstr:"quicktest" ~dststr:"xapi" ~transport:(Unix Xapi_globs.unix_domain_socket) ~http xml diff --git a/ocaml/quicktest/quicktest_vm_calibrate.ml b/ocaml/quicktest/quicktest_vm_calibrate.ml new file mode 100644 index 00000000000..12d60c7ea61 --- /dev/null +++ b/ocaml/quicktest/quicktest_vm_calibrate.ml @@ -0,0 +1,530 @@ +(* ++ * Copyright (c) Cloud Software Group, Inc ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU Lesser General Public License as published ++ * by the Free Software Foundation; version 2.1 only. with the special ++ * exception on linking described in file LICENSE. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU Lesser General Public License for more details. ++ *) + +open Quicktest_api_helpers +open Client.Client +open Quicktest_trace +open Quicktest_trace_api +open Quicktest_trace_rpc + +let div_round_up a b = Int64.(div (add a @@ pred b) b) + +module type Variable = sig + (** type of the variable *) + type t + + val name : string + (** name of the variable *) + + val set : client -> vm:API.ref_VM -> t -> unit + (** [set client ~vm value] API call that sets the variable on [vm] to [value] *) + + val values : client -> host:API.ref_host -> vm:API.ref_VM -> t Seq.t + (** [values client ~vm] is a sequence of valid values that + can be used to calibrate the memory overhead for this variable. + *) + + val to_int64 : t -> int64 + (** [to_int64 t] is the value of [t] as an integer that is used in memory + overhead calculation. + This is not necessarily the same as the raw value used by [set]. + *) + + val pp : t Fmt.t + (** [pp ppf t] pretty prints the value of the variable [t] *) +end + +let mib n = Int64.shift_left n 20 + +let bytes_to_pages bytes = Int64.div bytes @@ pagesize () + +let max_vms_per_host = 1000 + +let try_to_trigger_failure (type a) t ~host ~vm + (module V : Variable with type t = a) (x : a) vms = + if not !Quicktest_args.skip_stress then begin + V.set t ~vm x ; + let overhead = call t @@ VM.compute_memory_overhead ~vm + and vm_mem = Api.VM.call_get t VM.get_memory_dynamic_min ~self:vm in + let vm_total_mem = Int64.add overhead vm_mem in + let free_mem = call t @@ Host.compute_free_memory ~host in + let max_vms = Int64.div free_mem vm_total_mem |> Int64.to_int in + (* not too many, we cannot have more VMs than would fit on the host, + or more VMs than supported/host + *) + let max_vms = min (min vms max_vms) max_vms_per_host in + if max_vms >= vms then begin + Trace.with_ "Creating VMs" ~attrs:[("count", `Int max_vms)] @@ fun _ -> + let vms = ensure_vm_clones t ~vm max_vms (V.name ^ "-trigger") in + vms |> List.map (fun vm -> (host, vm)) |> start_vms t ; + fill_mem_pow2 t ~host ~vm ; + shutdown_vms t vms + end + (* can't create enough VMs to trigger the failure *) + end + +module Iterations = struct + type t = float + + let to_float = Fun.id +end + +module P = Cli_progress_bar.Make (Iterations) + +let fill_mem_test rpc session_id template () = + Trace.with_ __FUNCTION__ @@ fun _ -> + let t = {rpc= RPC.wrap ~log_body:true rpc; session_id} in + let host = call t @@ Host.get_by_uuid ~uuid:Qt.localhost_uuid in + Qt.VM.with_new rpc session_id ~template @@ fun vm -> fill_mem_pow2 t ~host ~vm + +let required_stable_iterations = 10 + +let iterations_limit = 100 + +let accepted_leak = mib 1L + +let host_mem_leak rpc session_id template () = + Trace.with_ __FUNCTION__ @@ fun scope -> + let t = {rpc= RPC.wrap ~log_body:true rpc; session_id} in + let host = call t @@ Host.get_by_uuid ~uuid:Qt.localhost_uuid in + Qt.VM.with_new rpc session_id ~template @@ fun vm -> + Api.VM.call_set t VM.set_memory ~self:vm ~value:(Int64.shift_left 1L 30) ; + let p = P.create 80 0. 1. in + let pages00 = stable_localhost_free_pages scope t ~host in + + let rec loop stable i = + Api.VM.with_call t "assert_can_boot_here" vm + @@ VM.assert_can_boot_here ~self:vm ~host ; + Scope.add_event scope (fun () -> + Opentelemetry.Event.make "assert_can_boot_here succeeded" + ) ; + + let pages0 = stable_localhost_free_pages scope t ~host in + + Api.VM.with_call t "start_on" vm + @@ VM.start_on ~vm ~host ~force:false ~start_paused:true ; + Api.VM.with_call t "hard_shutdown" vm @@ VM.hard_shutdown ~vm ; + + let pages1 = stable_localhost_free_pages scope t ~host in + let leak = Int64.sub pages0 pages1 in + Scope.add_event scope (fun () -> + Opentelemetry.Event.make + ~attrs: + [ + ("pages before", `Int (Int64.to_int pages0)) + ; ("pages after", `Int (Int64.to_int pages1)) + ; ("pages leak", `Int (Int64.to_int leak)) + ] + "start/shutdown delta" + ) ; + let iterations = + if leak > 0L then + Int64.div pages1 leak |> Int64.to_int + else + 0 + in + let iterations_done = i in + let iterations_total = + iterations_done + iterations + (required_stable_iterations - stable) + in + if P.update p (float_of_int iterations_done /. float_of_int iterations_total) + then + Printf.eprintf "\r%s%!" P.(string_of_bar p) ; + let total_leak = Int64.sub pages00 pages1 in + if i > iterations_limit then begin + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Severity_number_info + "Total memory leak after %d iterations is %Ld pages" i total_leak + ] + ) ; + if total_leak < accepted_leak then + () + else + (* it may take a long time to + actually OOM the host, but we've already proven the leak, so stop, + otherwise the test will timeout! *) + failwith "Host memory leak found!" + end else if iterations > 0 then begin + let () = + Scope.add_log scope @@ fun () -> + Opentelemetry.Logs.make_strf "Iterations to OOM: %d" iterations + in + (* we'll either OOM or stabilize (if it isn't a real leak, just a bounded + growth) *) + loop 0 (i + 1) + end else if stable < required_stable_iterations then + loop (stable + 1) (i + 1) + else begin + Printf.eprintf "\r%s\r%s%!" (String.make p.width ' ') P.(summarise p) ; + if i > stable then + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Severity_number_info + "Apparent memory leak stopped after %d iterations, total \ + apparent leak = %Ld pages" + i total_leak + ] + ) + end + in + loop 0 0 + +let boot1 rpc session_id template (module V : Variable) () = + let t = {rpc= RPC.wrap ~log_body:true rpc; session_id} in + let host = call t @@ Host.get_by_uuid ~uuid:Qt.localhost_uuid in + Qt.VM.with_new rpc session_id ~template @@ fun vm -> + let value = V.values t ~host ~vm |> List.of_seq |> List.rev |> List.hd in + (* set to largest: highest chance to find a bug *) + V.set t ~vm value ; + let total = call t @@ Host.compute_free_memory ~host in + let value = call t @@ VM.maximise_memory ~self:vm ~approximate:false ~total in + Trace.with_ __FUNCTION__ + ~attrs: + [ + ("vm_memory", `Int (Int64.to_int value)) + ; ("fill_memory", `Int (Int64.to_int total)) + ] + @@ fun scope -> + Api.VM.call_set t VM.set_memory ~self:vm ~value ; + + Api.VM.with_call t "assert_can_boot_here" vm + @@ VM.assert_can_boot_here ~self:vm ~host ; + Scope.add_event scope (fun () -> + Opentelemetry.Event.make "assert_can_boot_here succeeded" + ) ; + + start_vm t ~host ~vm ; + + Api.VM.with_call t "hard_shutdown" vm @@ VM.hard_shutdown ~vm + +let calibrate rpc session_id template (module V : Variable) () = + Trace.with_ __FUNCTION__ @@ fun scope -> + let t = {rpc= RPC.wrap ~log_body:true rpc; session_id} in + let host = call t @@ Host.get_by_uuid ~uuid:Qt.localhost_uuid in + Qt.VM.with_new rpc session_id ~template @@ fun vm -> + (* start with a small VM, [module V] can override it *) + let memory_min = Api.VM.call_get t VM.get_memory_static_min ~self:vm in + Api.VM.call_set t VM.set_memory ~self:vm ~value:memory_min ; + + let actual_free_pages = stable_localhost_free_pages scope t ~host in + let computed_free_pages = + call t @@ Host.compute_free_memory ~host |> bytes_to_pages + in + let headroom_pages = Int64.sub actual_free_pages computed_free_pages in + (* If XAPI overestimates there is this much extra space that can be used to + cover for any underestimates. + If this is negative then we can already run the system out of memory. + *) + Scope.add_event scope (fun () -> + Opentelemetry.Event.make + ~attrs: + [ + ("actual_free_pages_xen", `Int (Int64.to_int actual_free_pages)) + ; ("computed_free_pages_xapi", `Int (Int64.to_int computed_free_pages)) + ; ("free_headroom_pages", `Int (Int64.to_int headroom_pages)) + ] + "host_free_mem" + ) ; + if headroom_pages < 0L then begin + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Logs.Severity_number_warn + "XAPI has already underestimated free memory: %Ld" headroom_pages + ] + ) ; + Scope.set_status scope + Span_status.( + make ~message:"free memory underestimated" ~code:Status_code_error + ) + end ; + + let measure_overhead_pages value = + V.set t ~vm value ; + let pages = + with_measure_memory_pages scope t ~localhost:host @@ fun () -> + let (_ : _ list) = + Api.batched_run_or_cancel t "Start VM(s)/measure" + [Api.VM.Async.start_on ~vm ~host ~force:false ~start_paused:true] + |> check_tasks + in + () + in + let memory_target_pages = + call t @@ VM.get_memory_target ~self:vm |> bytes_to_pages + and memory_overhead_pages_xapi = + call t @@ VM.get_memory_overhead ~self:vm |> bytes_to_pages + in + Api.VM.with_call t "hard_shutdown" vm @@ VM.hard_shutdown ~vm ; + let overhead_pages = Int64.sub pages memory_target_pages in + Scope.add_event scope (fun () -> + Opentelemetry.Event.make + ~attrs: + [ + ("variable", `String (Fmt.to_to_string V.pp value)) + ; ("unit", `String V.name) + ; ("pages", `Int (Int64.to_int pages)) + ; ("memory_target_pages", `Int (Int64.to_int pages)) + ; ("overhead_pages", `Int (Int64.to_int overhead_pages)) + ; ( "xapi_estimated_pages" + , `Int (Int64.to_int memory_overhead_pages_xapi) + ) + ] + "overhead" + ) ; + let diff = Int64.sub overhead_pages memory_overhead_pages_xapi in + let vms_required = + if diff > 0L then begin + let vms = div_round_up headroom_pages diff in + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Severity_number_warn + "Memory overhead was underestimated by XAPI: %Ld < %Ld, diff: \ + %Ld.@,\ + VMs required for failure: %Ld" + memory_overhead_pages_xapi overhead_pages diff vms + ] + ) ; + Scope.set_status scope + Span_status.( + make ~message:"memory overhead underestimated" + ~code:Status_code_error + ) ; + vms + end else + Int64.max_int + in + (value, overhead_pages, vms_required) + in + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Severity_number_info + "Measuring VM %S impact on memory usage" V.name + ] + ) ; + let overhead_pages = + V.values t ~host ~vm |> Seq.map measure_overhead_pages |> List.of_seq + in + match overhead_pages with + | [] -> + assert false + | (x0, y0, vms0) :: rest -> + let x0 = V.to_int64 x0 in + let deltas = + rest + |> List.map @@ fun (x, y, vms) -> + let x = V.to_int64 x in + let x = Int64.sub x x0 and y = Int64.sub y y0 in + let ratio = + if x > 0L then + Int64.to_float y /. Int64.to_float x + else + 0. + in + (x, y, ratio, vms) + in + Opentelemetry.( + Logs.emit + (Logs.make_strf ~severity:Severity_number_info + "%s,memory_overhead_pages,coeff,vms" V.name + :: (deltas + |> List.map @@ fun (x, y, r, vms) -> + Logs.make_strf ~severity:Severity_number_info "%Ld,%Ld,%g,%Ld" x + y r vms + ) + ) + ) ; + let max_coeff = + List.fold_left (fun rmax (_, _, r, _) -> Float.max rmax r) 0. deltas + and min_vms = + List.fold_left + (fun rmin (_, _, _, vms) -> Int64.min rmin vms) + vms0 deltas + in + let op, max_coeff_int = + if Float.round max_coeff >= 1. then + ("*", Float.ceil max_coeff |> Int64.of_float) + else + ("/", Float.floor (1. /. max_coeff) |> Int64.of_float) + in + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Logs.Severity_number_info2 + "VM memory_overhead_pages = ... + %s * %g =~ ... + %s %s %Ld" + V.name max_coeff V.name op max_coeff_int + ] + ) ; + if min_vms < Int64.max_int then begin + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Logs.Severity_number_warn + "With %Ld VMs it might be possible to trigger OOM\n\ + \ error" + min_vms + ] + ) ; + if min_vms <= 1000L then + Scope.set_status scope + Span_status.( + make ~message:"free memory\n underestimated" + ~code:Status_code_error + ) ; + let x, _, _ = overhead_pages |> List.rev |> List.hd in + try_to_trigger_failure t ~host ~vm (module V) x (Int64.to_int min_vms) + end + +module VCPU = struct + type t = int64 + + let to_int64 = Fun.id + + let name = "vcpu" + + let pp = Fmt.int64 + + let set t ~vm value = + (* API is weird, there is no order of the 2 set calls that works for all + values, must satisfy [0 < vCPUs_at_startup <= vCPUs_max], + but there is no VM.set_vcpu call like VM.set_memory to set both in an + atomic call. + *) + let old = Api.VM.call_get t VM.get_VCPUs_at_startup ~self:vm in + if old <= value then begin + (* increasing number of vCPUs: change max first *) + Api.VM.call_set t VM.set_VCPUs_max ~self:vm ~value ; + Api.VM.call_set t VM.set_VCPUs_at_startup ~self:vm ~value + end else begin + (* decreasing number of vcpus: change on startup first *) + Api.VM.call_set t VM.set_VCPUs_at_startup ~self:vm ~value ; + Api.VM.call_set t VM.set_VCPUs_max ~self:vm ~value + end + + let values t ~host ~vm:_ = + let limit = + let host_cpus = call t @@ Host.get_host_CPUs ~self:host |> List.length in + (* can't use more vCPUs than the host has, and we support a maximum of 64 *) + min 64 host_cpus + in + let all = Seq.init limit (( + ) 1) |> Seq.map Int64.of_int in + (* There is some non-determinism (due to alignment/fragmentation?), so test + all values that we can, and then repeat it all again. + This should allow us to find a better maximum. + *) + Seq.append all all + (* points_between 1L limit *) +end + +module Pagetables = struct + type t = MiB of int64 + + let name = "pagetables" + + let to_bytes (MiB mib) = Int64.shift_left mib 20 + + let entries_in_pagetable () = + (* 4096 / 8 = 512 on x86-64 *) + Int64.div (pagesize ()) (Sys.word_size / 8 |> Int64.of_int) + + let to_int64 t = + let ( ++ ) = Int64.add in + let bytes = to_bytes t in + let pagesize = pagesize () in + let entries_in_pagetable = entries_in_pagetable () in + let pages = Int64.div bytes pagesize in + (* pagetables for ranges of 2MiB, 1GiB, 512GiB, 256TiB, see + https://wiki.osdev.org/Page_Tables#48-bit_virtual_address_space *) + let pt = div_round_up pages entries_in_pagetable in + let pd = div_round_up pt entries_in_pagetable in + let pdp = div_round_up pd entries_in_pagetable in + let pml4 = div_round_up pdp entries_in_pagetable in + (* for future-proofing, we don't actually support this much memory *) + let pml5 = div_round_up pml4 entries_in_pagetable in + (* Assuming that you'd have to allocate as 1GiB pages initially, + then shatter them to 2MiB pages, and then finally to 4KiB pages. + *) + let pagetables_4k = pt ++ pd ++ pdp ++ pml4 ++ pml5 + and pagetables_2m = pd ++ pdp ++ pml4 ++ pml5 + and pagetables_1g = pdp ++ pml4 ++ pml5 in + Opentelemetry.( + Logs.emit + [ + Logs.make_strf ~severity:Severity_number_debug + "%Ld bytes = %Ld pages;%Ld PT, %Ld PD, %Ld PDP, %Ld PML4, %Ld \ + PML5;%Ld pagesize pagetables = %Ld;%Ld pagesize pagetables = \ + %Ld;%Ld pagesize pagetables = %Ld" + bytes pages pt pd pdp pml4 pml5 pagesize pagetables_4k + Int64.(mul pagesize entries_in_pagetable) + pagetables_2m + Int64.(mul (mul pagesize entries_in_pagetable) entries_in_pagetable) + pagetables_1g + ] + ) ; + Int64.(pt ++ mul 2L pd ++ mul 3L pdp ++ mul 3L pml4 ++ mul 3L pml5) + + let of_bytes bytes = MiB (Int64.shift_right bytes 20) + + let set t ~vm mib = call t @@ VM.set_memory ~self:vm ~value:(to_bytes mib) + + let pp ppf (MiB mib as t) = + Fmt.pf ppf "%Ld pagetables (for %Ld pages = %Ld MiB)" (to_int64 t) + (Int64.div (to_bytes t) @@ pagesize ()) + mib + + let values t ~host ~vm = + let total = call t @@ Host.compute_free_memory ~host in + let value_max = + call t @@ VM.maximise_memory ~total ~approximate:false ~self:vm + in + let start = + [27; 28; 29; 30; 31; 39; 40] + |> List.to_seq + |> Seq.map (Int64.shift_left 1L) + |> Seq.filter (fun v -> v < value_max) + in + Seq.append start Seq.(return value_max) |> Seq.map of_bytes +end + +let specialise (name, speed, test) (module V : Variable) = + let name = Printf.sprintf "%s: %s" name V.name in + (name, speed, test (module V : Variable)) + +let variables = [(module VCPU : Variable); (module Pagetables : Variable)] + +let tests_cleanup () = + let open Qt_filter in + [("Cleanup", `Slow, cleanup)] |> conn + +let tests () = + let open Qt_filter in + List.concat + [ + [("Fill mem 1VM", `Slow, boot1); ("VM memory overhead", `Slow, calibrate)] + |> conn + |> vm_template Qt.VM.Template.other + |> List.concat_map (fun tc -> List.map (specialise tc) variables) + ; [ + ("Fill mem pow2", `Slow, fill_mem_test) + ; ("Host memory leak", `Slow, host_mem_leak) + ; ("Fill mem pow2", `Slow, fill_mem_test) + ] + |> conn + |> vm_template Qt.VM.Template.other + ] diff --git a/ocaml/quicktest/quicktest_vm_lifecycle.ml b/ocaml/quicktest/quicktest_vm_lifecycle.ml index b3de6b5b309..5ad1adf434d 100644 --- a/ocaml/quicktest/quicktest_vm_lifecycle.ml +++ b/ocaml/quicktest/quicktest_vm_lifecycle.ml @@ -91,11 +91,29 @@ let one rpc session_id vm test = | Halted -> wait_for_domid (fun domid' -> domid' = -1L) -let test rpc session_id sr_info vm_template () = +let test rpc session_id sr_info vm_template iso_info () = let sr = sr_info.Qt.sr in - Qt.VM.with_new rpc session_id ~template:vm_template ~sr (fun vm -> - List.iter (one rpc session_id vm) all_possible_tests - ) + let expr = + Printf.sprintf {|field "SR" = "%s"|} (Ref.string_of iso_info.Qt.sr) + in + let prefix = "memtest" in + let isos = + Client.Client.VDI.get_all_records_where ~rpc ~session_id ~expr + |> List.filter (fun (_, iso) -> + String.starts_with ~prefix iso.API.vDI_name_label + ) + |> List.sort (fun (_, a) (_, b) -> + -String.compare a.API.vDI_name_label b.API.vDI_name_label + ) + in + match isos with + | [] -> + Printf.eprintf "No ISO found with prefix %S\n%!" prefix + | (_, iso) :: _ -> + Printf.eprintf "Choosing ISO %S\n%!" iso.API.vDI_name_label ; + Qt.VM.with_new rpc session_id ~template:vm_template ~iso ~sr (fun vm -> + List.iter (one rpc session_id vm) all_possible_tests + ) let tests () = let open Qt_filter in @@ -103,6 +121,7 @@ let tests () = [("VM lifecycle tests", `Slow, test)] |> conn |> sr SR.(all |> allowed_operations [`vdi_create]) - |> vm_template "CoreOS" + |> vm_template Qt.VM.Template.other + |> sr SR.(all |> is_iso) ] |> List.concat diff --git a/ocaml/quicktest/quicktest_vm_memory.ml b/ocaml/quicktest/quicktest_vm_memory.ml new file mode 100644 index 00000000000..116403cedcf --- /dev/null +++ b/ocaml/quicktest/quicktest_vm_memory.ml @@ -0,0 +1,114 @@ +open Quicktest_api_helpers +open Client.Client +open Quicktest_trace +open Quicktest_trace_api +open Quicktest_trace_rpc + +let all_possible_tests = [16; 1; 3; 8] + +let check_tasks tasks = + tasks |> List.map @@ function Ok x -> x | Error exn -> raise exn + +let one t ~host ~vm ~workload_vm n = + Trace.with_ __FUNCTION__ @@ fun scope -> + workload t ~host ~workload_vm ; + let vms = fill_mem_n t ~workaround_migration:true ~host ~vm ~n in + + let migration_host, migration_vm = List.nth vms 0 in + + if n > 1 then begin + (* shutdown one to make room *) + let ((_, vm_for_shutdown) as host_vm_shutdown) = + List.nth (vms |> List.tl) 0 + in + shutdown_vms t [vm_for_shutdown] ; + + Api.VM.with_call t "unpause" migration_vm + @@ Client.Client.VM.unpause ~vm:migration_vm ; + + let () = + Trace.with_ ~scope "localhost migrate1" @@ fun _ -> + let task t = + Api.VM.task t "localhost migrate" ignore migration_vm + @@ Async.VM.pool_migrate ~vm:migration_vm + ~options:[("force", "true")] + ~host:migration_host + in + let (_ : _ list) = + Api.batched_run_or_cancel t "localhost migrate" [task] + in + () + in + + (* start it up again *) + start_vms t [host_vm_shutdown] ; + + Api.VM.with_call t "pause" migration_vm + @@ Client.Client.VM.pause ~vm:migration_vm + end ; + + let () = + match call t @@ Client.Client.Host.get_all |> List.filter (( <> ) host) with + | [] -> + () (* not in a pool: SKIP *) + | other :: _ -> + let tasks = + List.map + (fun (_, vm) t -> + Api.VM.task t "unpause" ignore vm @@ Async.VM.unpause ~vm + ) + vms + in + let (_ : unit list) = + Api.batched_run_or_cancel t "unpause" tasks |> check_tasks + in + let tasks = + List.map + (fun (_, vm) t -> + Api.VM.task t "pool migrate" ignore vm + @@ Client.Client.Async.VM.pool_migrate ~host:other ~vm + ~options:[("force", "true")] + ) + vms + in + let (_ : unit list) = + Api.batched_run_or_cancel t "pool migrate" tasks |> check_tasks + in + + let tasks = + List.map + (fun (host, vm) t -> + Api.VM.task t "pool migrate back" ignore vm + @@ Client.Client.Async.VM.pool_migrate ~host ~vm + ~options:[("force", "true")] + ) + vms + in + let (_ : unit list) = + Api.batched_run_or_cancel t "pool migrate back" tasks |> check_tasks + in + () + in + + hard_reboot_vms t [(migration_host, migration_vm)] ; + + hard_reboot_vms t vms ; + + shutdown_vms t (List.map snd vms) + +let test rpc session_id template iso () = + let t = {rpc= RPC.wrap ~log_body:true rpc; session_id} in + let host = call t @@ Host.get_by_uuid ~uuid:Qt.localhost_uuid in + Qt.VM.with_new rpc session_id ~template ~iso @@ fun workload_vm -> + Qt.VM.with_new rpc session_id ~template ~iso @@ fun vm -> + List.iter (one t ~host ~vm ~workload_vm) all_possible_tests + +let tests () = + let open Qt_filter in + [ + [("VM memory tests", `Slow, test)] + |> conn + |> vm_template Qt.VM.Template.other + |> memtest_iso ?prefix:None + ] + |> List.concat diff --git a/ocaml/quicktest/trace/api/api.ml b/ocaml/quicktest/trace/api/api.ml new file mode 100644 index 00000000000..1ca2e6a9ee6 --- /dev/null +++ b/ocaml/quicktest/trace/api/api.ml @@ -0,0 +1,422 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +open Quicktest_trace +open Quicktest_trace_rpc +open Client.Client +open Types + +module Task = struct + type 'a t = { + task: API.ref_task + ; scope: Scope.t + ; log_obj: client -> Scope.t -> unit + ; of_rpc: Rpc.t -> 'a + ; finally: (unit, string * Printexc.raw_backtrace) result -> unit + ; mutable finished: ('a, exn) result option + } + + let task t = t.task + + let v name of_rpc log_obj f = + (* TODO: should mark span kind as async, but not supported yet *) + let thunk, finally = Trace.with_' name @@ fun scope -> (scope, f ()) in + let scope, task = thunk () in + {task; scope; log_obj; of_rpc; finally; finished= None} + + let try_cancel t task = + try + Trace.with_ ~scope:task.scope "Task.try_cancel" @@ fun _ -> + let self = task.task in + let allowed = call t @@ Task.get_allowed_operations ~self in + if List.mem `cancel allowed then call t @@ Task.cancel ~task:task.task + with Api_errors.Server_error _ -> + (* race: ignore errors, may have terminated on its own already *) + () + + (*let destroy t task = + try + let span_id = Scope.span_id task.scope + and trace_id = Scope.trace_id task.scope in + (* the scope has finished, so link to it instead of creating a child trace *) + let links = [Opentelemetry.Span_link.make ~trace_id ~span_id ()] in + Trace.with_ ~links "Task.destroy" @@ fun _ -> + call t @@ Task.destroy ~self:task.task + with Api_errors.Server_error _ -> + (* task may have been GC-ed already, error logged by Trace.with_ above *) + ()*) + + (* for remote calls the local callstack is not interesting on the remote span. *) + let no_bt = Printexc.get_callstack 0 + + let on_progress task progress = + Scope.add_metrics task.scope @@ fun () -> + Opentelemetry.Metrics.(gauge ~name:"task.progress" [float progress]) + + let has_result task = Option.is_some task.finished + + let result t task = + match task.finished with + | Some r -> + r + | None -> + Trace.with_ ~scope:task.scope "Task.result" @@ fun scope -> + let remote_task = task.task in + let status = call t @@ Task.get_status ~self:remote_task in + let outcome = + match status with + | `cancelling | `cancelled -> + Error + (Api_errors.Server_error + (Api_errors.task_cancelled, [Ref.string_of remote_task]) + ) + | `pending -> + Error + (Failure + "wait_for_task_completion failed; task is still\n\ + \ pending" + ) + | `success -> + call t @@ Task.get_result ~self:remote_task + |> Xmlrpc.of_string + |> task.of_rpc + |> Result.ok + | `failure -> + let exn = + try + let error_info = + call t @@ Task.get_error_info ~self:remote_task + in + let trace = call t @@ Task.get_backtrace ~self:remote_task in + let exn = + match error_info with + | code :: params -> + Api_errors.Server_error (code, params) + | [] -> + Failure + (Printf.sprintf + "Task failed but no error recorded: %s" + (Ref.string_of remote_task) + ) + in + let bt = + Backtrace.(t_of_sexp (Sexplib.Sexp.of_string trace)) + in + Scope.add_attrs scope (fun () -> + [ + ( "exception.stacktrace" + , `String (Backtrace.to_string_hum bt) + ) + ] + ) ; + task.log_obj t scope ; + task.finally (Error (Printexc.to_string exn, no_bt)) ; + exn + with exn -> + Backtrace.is_important exn ; + let bt = Printexc.get_raw_backtrace () in + task.finally (Error (Printexc.to_string exn, no_bt)) ; + Printexc.raise_with_backtrace exn bt + in + Error exn + in + let () = + task.finally + @@ + match outcome with + | Ok _ -> + Ok () + | Error e -> + Error (Printexc.to_string e, no_bt) + in + task.finished <- Some outcome ; + outcome +end + +module Object (O : OBJECT) = struct + include O + + let log t scope ~self = + (* best-effort: try to log the object status after the failed call *) + try + (* create a new span to log any espacing errors *) + Trace.with_ ~scope "get_record" @@ fun scope -> + let dbobj = call t @@ O.get_record ~self in + Scope.set_decision scope Sampling.RECORD_AND_SAMPLE ; + Scope.add_delayed_log scope @@ fun () -> + RPC.log_rpc scope (O.string_of_ref self) (O.rpc_of_t dbobj) + with _ -> () + + let with_call t name self f = + Trace.with_ name @@ fun scope -> + try call t @@ f + with Api_errors.Server_error _ as e -> + let bt = Printexc.get_raw_backtrace () in + Backtrace.is_important e ; + Scope.add_attrs scope (fun () -> + [ + ( "exception.stacktrace" + , `String (e |> Backtrace.get |> Backtrace.to_string_hum) + ) + ] + ) ; + log t scope ~self ; + (* call backend tick callbacks, if any. These would sample metrics. *) + SpanProcessor.force_flush () ; + Printexc.raise_with_backtrace e bt + + let call_set t (f : (self:dbref -> value:'a -> unit) api) ~self ~value : unit + = + with_call t "set" self @@ f ~self ~value + + let call_get t (f : (self:dbref -> 'a) api) ~self = + Trace.with_ "get" @@ fun _ -> call t @@ f ~self + + module Task = struct + include Task + + let of_call t name of_rpc obj f = + let log_obj t scope = log t scope ~self:obj in + Task.v name of_rpc log_obj @@ fun () -> with_call t "task" obj f + end + + let task = Task.of_call +end + +module VM = struct + include Object (struct + type t = API.vM_t + + let rpc_of_t = API.rpc_of_vM_t + + type dbref = API.ref_VM + + let dbref_of_rpc = Ref.t_of_rpc (fun _ -> `VM) + + let string_of_ref = Ref.string_of + + let get_record = VM.get_record + end) + + include VM + + module Async = struct + let clone t ~vm ~new_name = + task t __FUNCTION__ dbref_of_rpc vm @@ Async.VM.clone ~vm ~new_name + + let pool_migrate t ~vm ~host ~options = + task t __FUNCTION__ ignore vm @@ Async.VM.pool_migrate ~vm ~host ~options + + let hard_reboot t ~vm = + task t __FUNCTION__ ignore vm @@ Async.VM.hard_reboot ~vm + + let hard_shutdown t ~vm = + task t __FUNCTION__ ignore vm @@ Async.VM.hard_shutdown ~vm + + let start_on t ~vm ~host ~start_paused ~force = + task t __FUNCTION__ ignore vm + @@ Async.VM.start_on ~vm ~host ~start_paused ~force + end +end + +module TaskMap = Map.Make (struct + type t = API.ref_task + + let compare = Ref.compare +end) + +(*let test t ~host vm = + VM.task t "start_on" ignore vm + @@ Async.VM.start_on ~vm ~host ~start_paused:true ~force:false*) + +module P = Cli_progress_bar.Make (struct + type t = float + + let to_float = Fun.id +end) + +let on_progress_cli name ~total = + let p = P.create 80 0. 1. in + Printf.eprintf "\n%s\n%!" name ; + (* ensure we're at beginning of line *) + fun completed progress -> + if P.update p progress then Printf.eprintf "\r%s%!" P.(string_of_bar p) ; + if completed = total then + Printf.eprintf "\r%s\r%s%!" (String.make p.width ' ') P.(summarise p) + +let run_with_progress t name _scope ?(on_progress = on_progress_cli name) + ~callback ~total tasks = + (* Waiting for tasks to complete generates a lot of read queries, + that we don't want to clutter the output, hence use a new trace id here. + Launching and completeing tasks will still use their own scopes. + *) + Trace.with_ ~force_new_trace_id:true (name ^ "/run_with_progress") + @@ fun scope -> + (* total can be > List.length tasks in case [callback] inserts more tasks *) + let count = float_of_int total in + let tbl = Hashtbl.create 7 in + let overall = ref 0. in + let on_progress = on_progress ~total in + let on_task_progress task completed progress = + let old = Hashtbl.find_opt tbl task |> Option.value ~default:0. in + (* update running sum by removing old and adding new progress of task *) + overall := !overall -. old +. progress ; + Hashtbl.replace tbl task progress ; + (* progress is between [0, 1], so divide by [count] *) + let progress = !overall /. count in + let () = + Scope.add_metrics scope @@ fun () -> + Opentelemetry.Metrics.(gauge ~name:"task.progress" [float progress]) + in + on_progress completed progress + in + call t + @@ Tasks.wait_for_all_with_progress ~tasks:(List.map Task.task tasks) + ~callback ~on_progress:on_task_progress ; + on_progress total 1. + +let iter_lazy f tasks = + tasks + |> List.iter @@ fun task -> + if Lazy.is_val task then + let task = Lazy.force task in + f task + +let cancel_pending t scope tasks = + Trace.with_ ~scope "Cancel pending" @@ fun _ -> + tasks + |> iter_lazy @@ fun task -> + if not (Task.has_result task) then Task.try_cancel t task + +let batched_run_or_cancel t name ?(batch_size = 32) ?on_progress apifns = + (* default batch_size= 32 = 2*Dom0 vCPUs *) + let total = List.length apifns in + Trace.with_ (name ^ "/batched") + ~attrs:[("total", `Int total); ("batch_size", `Int batch_size)] + @@ fun scope -> + let taskref_to_task = Hashtbl.create total in + + let launch_task i f = + Trace.with_ ~scope "launch task" ~attrs:[("i", `Int i)] @@ fun _scope -> + let task = f t in + Hashtbl.replace taskref_to_task (Task.task task) task ; + task + in + + let all_tasks = apifns |> List.mapi (fun i f -> lazy (launch_task i f)) in + + let finally () = + cancel_pending t scope all_tasks + (* FIXME: this seems to always raise an exception, did XAPI already destroy + these? + Trace.with_ ~scope "destroy tasks" @@ fun _ -> + all_tasks |> iter_lazy Task.(destroy t)*) + in + + Fun.protect ~finally @@ fun () -> + (* launches a new task, every time an element from the sequence is retrieved *) + let next = + all_tasks |> List.to_seq |> Seq.map Lazy.force |> Seq.to_dispenser + in + + let callback completed task = + let () = + match Hashtbl.find_opt taskref_to_task task with + | None -> + (* unknown task, shouldn't happen *) + () + | Some task -> + let outcome = Task.result t task in + Scope.add_event scope (fun () -> + let launched = Hashtbl.length taskref_to_task in + Opentelemetry.Event.make + ~attrs: + [ + ("completed", `Int completed) + ; ("success", `Bool (Result.is_ok outcome)) + ; ("live", `Int (launched - completed)) + ; ("remaining", `Int (total - launched)) + ] + "subtask.completed" + ) ; + + if Result.is_error outcome then cancel_pending t scope all_tasks + in + + (* we've completed one, replace it with a new one, if any, + that way we always have at most batch_size tasks running + *) + match next () with + | None -> + [] + | Some task -> + [task.task] + in + + (* Start batch_size tasks *) + let tasks = + Trace.with_ ~scope "launch initial" ~attrs:[("batch_size", `Int batch_size)] + @@ fun _ -> next |> Seq.of_dispenser |> Seq.take batch_size |> List.of_seq + in + + run_with_progress t name scope ?on_progress ~callback ~total tasks ; + + Trace.with_ ~scope "retrieve task results" @@ fun _ -> + let results = + all_tasks + |> List.map (fun task -> + if Lazy.is_val task then + Task.result t (Lazy.force task) + else + Error (Failure "Task group cancelled") + ) + in + if List.exists Result.is_error results then + Scope.set_status scope + @@ Span_status.make ~code:Status_code_error ~message:"task(s) failed" ; + results + +let rec wait_no_active_tasks ?on_progress t ~host = + let tasks = + call t @@ Client.Client.Task.get_all_records + |> List.filter_map @@ fun (taskref, task) -> + if + task.API.task_resident_on = host + && (task.API.task_status = `pending + || task.API.task_status = `cancelling + ) + then + Some + ( Task.v task.API.task_name_label ignore (fun _ _ -> ()) @@ fun () -> + taskref + ) + else + None + in + match tasks with + | [] -> + () + | _ -> + (* we can't efficiently wait for `canceling to become `canceled. + Avoid busy loop + *) + Thread.delay 0.1 ; + let () = + Trace.with_ __FUNCTION__ @@ fun scope -> + run_with_progress t "wait_no_active_tasks" scope ?on_progress + ~total:(List.length tasks) + ~callback:(fun _ _ -> []) + tasks + in + (* more tasks could've been created meanwhile *) + (wait_no_active_tasks [@tailcall]) ?on_progress t ~host diff --git a/ocaml/quicktest/trace/api/api.mli b/ocaml/quicktest/trace/api/api.mli new file mode 100644 index 00000000000..a674dec7755 --- /dev/null +++ b/ocaml/quicktest/trace/api/api.mli @@ -0,0 +1,79 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +open Types +open Client.Client + +module Task : TASK + +module Object : functor (O : OBJECT) -> + OBJECT_OUT + with type dbref = O.dbref + and type t = O.t + and type 'a Task.t = 'a Task.t + +module VM : sig + include + OBJECT_OUT + with type dbref = API.ref_VM + and type t = API.vM_t + and type 'a Task.t = 'a Task.t + + include module type of VM + + module Async : sig + val clone : client -> vm:dbref -> new_name:string -> dbref Task.t + + val pool_migrate : + Client.Client.client + -> vm:dbref + -> host:[< `host] Ref.t + -> options:(string * string) list + -> unit Task.t + + val hard_reboot : Client.Client.client -> vm:dbref -> unit Task.t + + val hard_shutdown : Client.Client.client -> vm:dbref -> unit Task.t + + val start_on : + Client.Client.client + -> vm:dbref + -> host:[< `host] Ref.t + -> start_paused:bool + -> force:bool + -> unit Task.t + end +end + +val batched_run_or_cancel : + client + -> string + -> ?batch_size:int + -> ?on_progress:(total:int -> int -> float -> unit) + -> (client -> 'a Task.t) list + -> ('a, exn) result list +(** [batched_run_or_cancel name ?batch_size t ?on_progress tasks] runs all [tasks], and waits for them to complete. + If any task fails, all other pending tasks are canceled if possible, and + then waits for all tasks to complete. + [on_progress] is called with the overall progress of tasks, by default this + shows a CLI progress bar. + + @returns the result of all tasks +*) + +val wait_no_active_tasks : + ?on_progress:(total:int -> int -> float -> unit) + -> client + -> host:API.ref_host + -> unit +(** [wait_no_active_tasks t ~host] waits until [host] has 0 active tasks. *) diff --git a/ocaml/quicktest/trace/api/dune b/ocaml/quicktest/trace/api/dune new file mode 100644 index 00000000000..cee750ccfb6 --- /dev/null +++ b/ocaml/quicktest/trace/api/dune @@ -0,0 +1,16 @@ +(library + (name quicktest_trace_api) + (modules_without_implementation types) + (libraries + cli_progress_bar + quicktest_trace + quicktest_trace_rpc + rpclib.xml + threads.posix + sexplib + sexplib0 + backtrace + xapi-client + xapi-consts + xapi-types + )) diff --git a/ocaml/quicktest/trace/api/types.mli b/ocaml/quicktest/trace/api/types.mli new file mode 100644 index 00000000000..baafa450f0f --- /dev/null +++ b/ocaml/quicktest/trace/api/types.mli @@ -0,0 +1,93 @@ +open Quicktest_trace +open Client.Client + +(** A XAPI object *) +module type OBJECT = sig + (** XAPI DB record type *) + type t + + (** 'a {!type:Ref.t} *) + type dbref + + val rpc_of_t : t -> Rpc.t + (** [rpc_of_t t] converts a DB record to an Rpc type *) + + val string_of_ref : dbref -> string + (** [string_of_ref dbref] converts a db reference to a string *) + + val dbref_of_rpc : Rpc.t -> dbref + (** [dbref_of_rpc rpc] unmarshals a dbref from [rpc]. *) + + val get_record : (self:dbref -> t) Client.Client.api + (** [get_record ~rpc ~session ~self] is the API call to retrieve the DB + record, given a db reference [self] *) +end + +module type TASK = sig + type 'a t + + val v : + string + -> (Rpc.t -> 'a) + -> (client -> Scope.t -> unit) + -> (unit -> API.ref_task) + -> 'a t + (** [v client name of_rpc log_obj f] is the Async API call [f] + that returns a Task. + The Task result is converted using [of_rpc] on success. + On failure [log_obj] is used. + *) + + val task : _ t -> API.ref_task + (** [task t] is the DB reference for the task *) + + val result : Client.Client.client -> 'a t -> ('a, exn) result + (** [result t task] retrieves the task result, raises exception on + failure, and calls [log ~self:task.dbref]. + Can be called multiple times, it caches the result. + Also marks the tracing span as completed. + *) + + val on_progress : _ t -> float -> unit + (** [on_progress task progress] emits a [task.progress] metric, where [progress] should be between 0.0 and 1.0. *) +end + +module type OBJECT_OUT = sig + include OBJECT + + val log : Client.Client.client -> Scope.t -> self:dbref -> unit + (** [log client scope ~self] retrieves the DB record for [self] and logs it. *) + + val with_call : Client.Client.client -> string -> dbref -> 'a api -> 'a + (** [with_call client name dbref f] is the API call [f], and calls [log ~self] on failure. *) + + val call_set : + Client.Client.client + -> (self:dbref -> value:'a -> unit) api + -> self:dbref + -> value:'a + -> unit + (** [call_set t f ~self ~value] is the API call [f ~self ~value], calls [log ~self] on failure *) + + val call_get : + Client.Client.client -> (self:dbref -> 'a) api -> self:dbref -> 'a + (** [call_get t f ~self] is the API call [f ~self] *) + + module Task : sig + include TASK + + val of_call : + client -> string -> (Rpc.t -> 'a) -> dbref -> API.ref_task api -> 'a t + (** [of_call client name of_rpc dbref f] is the Async API call [f] on + the object [dbref] that creates a task [name], and the result is mapped using [of_rpc]. *) + end + + val task : + Client.Client.client + -> string + -> (Rpc.t -> 'a) + -> dbref + -> API.ref_task api + -> 'a Task.t + (** [task] is {!val:Task.of_call} *) +end diff --git a/ocaml/quicktest/trace/bounded.ml b/ocaml/quicktest/trace/bounded.ml new file mode 100644 index 00000000000..f365a5b17dc --- /dev/null +++ b/ocaml/quicktest/trace/bounded.ml @@ -0,0 +1,32 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type 'a t = {q: 'a Queue.t; capacity: int; mutable dropped: int} + +let make capacity = {q= Queue.create (); capacity; dropped= 0} + +let capacity t = t.capacity + +let dropped t = t.dropped + +let add (type a) t (e : a) = + while Queue.length t.q >= t.capacity do + let (_ : a) = Queue.pop t.q in + t.dropped <- t.dropped + 1 + done ; + Queue.add e t.q + +let to_seq t = Queue.to_seq t.q + +let clear t = Queue.clear t.q diff --git a/ocaml/quicktest/trace/bounded.mli b/ocaml/quicktest/trace/bounded.mli new file mode 100644 index 00000000000..5191e90a8a4 --- /dev/null +++ b/ocaml/quicktest/trace/bounded.mli @@ -0,0 +1,38 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** bounded container for elements of type ['a] *) +type 'a t + +val make : int -> 'a t +(** [make capacity] creates a bounded container that can hold at most [capacity] items. + When [capacity] is exceeded the oldest item will be dropped. +*) + +val capacity : _ t -> int +(** [capacity t] is the maximum number of items that [t] can store *) + +val dropped : _ t -> int +(** [dropped t] is the number of items dropped because [capacity] was exceeded. *) + +val add : 'a t -> 'a -> unit +(** [add t x] adds [x] to [t], dropping the oldest item to make room if + [capacity] is exceeded. *) + +val to_seq : 'a t -> 'a Seq.t +(** [to_seq t] iterates on the container. The container must not be modified + during iteration. *) + +val clear : _ t -> unit +(** [clear ()] removes all items, but doesn't alter [dropped]. *) diff --git a/ocaml/quicktest/trace/consoleBackend.ml b/ocaml/quicktest/trace/consoleBackend.ml new file mode 100644 index 00000000000..be7a15536c5 --- /dev/null +++ b/ocaml/quicktest/trace/consoleBackend.ml @@ -0,0 +1,318 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let string_of_severity = + let open Opentelemetry_proto.Logs in + function + | Severity_number_unspecified + | Severity_number_trace + | Severity_number_trace2 + | Severity_number_trace3 + | Severity_number_trace4 -> + (Logs.Debug, Some "TRACE") + | Severity_number_debug + | Severity_number_debug2 + | Severity_number_debug3 + | Severity_number_debug4 -> + (Logs.Debug, None) + | Severity_number_info -> + ( Logs.App + , None + (* matches the reverse mapping in opentelemetry.logs for Logs.App *) + ) + | Severity_number_info2 | Severity_number_info3 | Severity_number_info4 -> + (Logs.Info, None) + | Severity_number_warn + | Severity_number_warn2 + | Severity_number_warn3 + | Severity_number_warn4 -> + (Logs.Warning, None) + | Severity_number_error + | Severity_number_error2 + | Severity_number_error3 + | Severity_number_error4 -> + (Logs.Error, None) + | Severity_number_fatal + | Severity_number_fatal2 + | Severity_number_fatal3 + | Severity_number_fatal4 -> + (Logs.Error, Some "FATAL") + +open Opentelemetry + +let ptime_of_nano ns = + let d = Int64.div ns Timestamp_ns.ns_in_a_day |> Int64.to_int + and ps = Int64.mul 1000L @@ Int64.rem ns Timestamp_ns.ns_in_a_day in + Ptime.unsafe_of_d_ps (d, ps) + +let pp_timestamp_ns = Fmt.using ptime_of_nano @@ Ptime.pp_rfc3339 ~frac_s:9 () + +let timestamp_style = `Faint + +(*let pp_line m ?header ?tags fmt = m ?header ?tags fmt*) + +open Opentelemetry_proto.Common + +(** See https://ocaml.org/manual/5.3/api/Format_tutorial.html#1_Refinementonhovboxes *) +let structural_braces pp ppf = Fmt.pf ppf "@[<1>{%a@;<0 -1>}@]" pp + +let rec pp_any_value ppf = function + | String_value s -> + (* format embedded newlines as Fmt.cut, useful for stacktraces *) + Fmt.(text |> hbox) ppf (String.trim s) + | Bool_value b -> + Fmt.bool ppf b + | Int_value i -> + Fmt.int64 ppf i + | Double_value d -> + Fmt.float ppf d + | Array_value {values} -> + Fmt.(Dump.list pp_any_value ppf values) + | Kvlist_value {values} -> + (structural_braces pp_key_value_list) ppf values + | Bytes_value b -> + Format.pp_print_bytes ppf b + +and pp_key_value ppf {key; value} = + match value with + | None -> + Fmt.string ppf key + | Some value -> + Fmt.field key Fun.id pp_any_value ppf value + +and pp_key_value_list ppf = Fmt.(list ~sep:comma pp_key_value) ppf + +let pp_body = Fmt.option pp_any_value + +let create_backend + ?(severity = Opentelemetry_proto.Logs.Severity_number_unspecified) + ?(formatter = Fmt.stderr) () = + (module struct + open Opentelemetry_proto.Logs + + let pp_line ~time_unix_nano ~span_id fmt = + (* vertical pretty printers would always insert a newline, + to avoid them we need to override the newline printing functions *) + let b = Buffer.create 80 in + let buf = Fmt.with_buffer ~like:formatter b in + Format.pp_set_margin buf 10000 ; + Format.( + pp_set_formatter_out_functions buf + { + (pp_get_formatter_out_functions buf ()) with + out_newline= + (fun () -> if Fmt.utf_8 buf then Buffer.add_string b "↵") + ; out_indent= (fun n -> if n >= 1 then Buffer.add_char b ' ') + } + ) ; + let k buf = + Fmt.flush buf () ; + Fmt.pf formatter "[%a|%a] %a@," + Fmt.(styled timestamp_style pp_timestamp_ns) + time_unix_nano + Fmt.(styled timestamp_style Span_id.pp) + ( if Bytes.length span_id > 0 then + span_id |> Span_id.of_bytes + else + Span_id.dummy + ) + Fmt.buffer b + in + Format.kfprintf k buf fmt + + let log_record (t : log_record) = + (* attributes are ignored *) + if t.severity_number >= severity then ( + let level, severity = string_of_severity t.severity_number in + + let severity = + if t.severity_text <> "" then + Some t.severity_text + else + severity + in + pp_line ~time_unix_nano:t.time_unix_nano ~span_id:t.span_id " %a@ %a" + Logs_fmt.pp_header (level, severity) pp_body t.body ; + if t.severity_number >= Severity_number_info then Fmt.flush formatter () + ) + + let scope_logs t = + List.iter log_record t.Opentelemetry_proto.Logs.log_records + + let resource_logs t = List.iter scope_logs t.scope_logs + + let send_logs = + Collector. + { + send= + (fun (msg : Opentelemetry_proto.Logs.resource_logs list) ~ret -> + msg |> List.iter resource_logs ; + ret () + ) + } + + open Opentelemetry_proto.Trace + + let status t = t.status + + let attributes t = t.attributes + + let events t = t.events |> List.rev + + let style_ok = `Green + + let style_error = `Red + + let ok = Fmt.(if_utf_8 (any "✓") (any "OK")) + + let error = Fmt.(if_utf_8 (any "✗") (any "ERROR")) + + let unset = Fmt.any "?" + + let pp_status_status_code ppf = function + | Status_code_ok -> + ok ppf () + | Status_code_error -> + error ppf () + | Status_code_unset -> + unset ppf () + + let pp_status ppf t = + let style = + match t.code with + | Status_code_ok -> + style_ok + | Status_code_error -> + style_error + | Status_code_unset -> + `None + in + + Fmt.pf ppf "%a %a" + (Fmt.styled style pp_status_status_code) + t.code Fmt.string t.message + + let pp_span_event ppf (t : Event.t) = + Fmt.pf ppf " @[%s@ %a@]" t.name pp_key_value_list t.attributes + + let style_span = `Bold + + let is_error = function + | Some {code= Status_code_error; _} -> + true + | Some _ | None -> + false + + let span t = + if is_error t.status || severity <= Severity_number_debug then ( + let duration = + (Int64.sub t.end_time_unix_nano t.start_time_unix_nano + |> Int64.to_float + ) + *. 1e-9 + in + pp_line ~time_unix_nano:t.start_time_unix_nano ~span_id:t.span_id + "@[<1>%a@ %a@ %a@]" + Fmt.(styled style_span string) + t.name + Fmt.(option pp_status) + (status t) pp_key_value_list (attributes t) ; + let () = + events t + |> List.iter @@ fun ev -> + pp_line ~time_unix_nano:ev.time_unix_nano ~span_id:t.span_id "%a" + pp_span_event ev ; + pp_line ~time_unix_nano:t.end_time_unix_nano ~span_id:t.span_id + "[duration: %+10.6fs]" duration + in + if is_error t.status then Fmt.flush formatter () + ) + + let scope_spans t = t.spans |> List.iter span + + let resource_spans t = t.scope_spans |> List.iter scope_spans + + let send_trace = + Collector. + { + send= + (fun msg ~ret -> + msg |> List.iter resource_spans ; + ret () + ) + } + + open Opentelemetry_proto.Metrics + + let metric = ignore + + let scope_metrics t = t.metrics |> List.iter metric + + let resource_metrics t = t.scope_metrics |> List.iter scope_metrics + + let send_metrics = + Collector. + { + send= + (fun msg ~ret -> + msg |> List.iter resource_metrics ; + ret () + ) + } + + let signal_emit_gc_metrics () = () + + let on_tick = Atomic.make (AList.make ()) + + let execute f = f () + + let flush () = Fmt.flush formatter () + + let tick () = + Atomic.get on_tick |> AList.get |> List.iter execute ; + flush () + + let cleanup () = flush () + + let set_on_tick_callbacks = Atomic.set on_tick end + : Opentelemetry.Collector.BACKEND +) + +let () = + let m = Mutex.create () in + let lock () = Mutex.lock m and unlock () = Mutex.unlock m in + Opentelemetry.Lock.set_mutex ~lock ~unlock + +let cols () = + try + let ch = Unix.open_process_args_in "tput" [|"tput"; "cols"|] in + let finally () = + let (_ : Unix.process_status) = Unix.close_process_in ch in + () + in + Option.map int_of_string + @@ Fun.protect ~finally + @@ fun () -> In_channel.input_line ch + with _ -> None + +let () = + (* set pretty printer margin according to actual terminal width, if known *) + cols () + |> Option.iter @@ fun margin -> + Format.pp_set_margin Fmt.stdout margin ; + Format.pp_set_margin Fmt.stderr margin + +let with_setup ?severity ?formatter ?enable () f = + let backend = create_backend ?severity ?formatter () in + Collector.with_setup_debug_backend ?enable backend () f diff --git a/ocaml/quicktest/trace/consoleBackend.mli b/ocaml/quicktest/trace/consoleBackend.mli new file mode 100644 index 00000000000..6a2482c2f85 --- /dev/null +++ b/ocaml/quicktest/trace/consoleBackend.mli @@ -0,0 +1,31 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val create_backend : + ?severity:Opentelemetry_proto.Logs.severity_number + -> ?formatter:Format.formatter + -> unit + -> (module Opentelemetry.Collector.BACKEND) +(** [create_backend ?severity ?formatter ()] is an Opentelemetry backend that prints + trace spans and logs that have at least [severity] on [formatter], default + {!val:Fmt.stderr}. Error spans are always printed, + and successful trace spans are considered {!val:Opentelemetry_proto.Logs.Severity_number_debug} *) + +val with_setup : + ?severity:Opentelemetry_proto.Logs.severity_number + -> ?formatter:Format.formatter + -> ?enable:bool + -> unit + -> (unit -> 'a) + -> 'a diff --git a/ocaml/quicktest/trace/diskBackend.ml b/ocaml/quicktest/trace/diskBackend.ml new file mode 100644 index 00000000000..e11b4a93100 --- /dev/null +++ b/ocaml/quicktest/trace/diskBackend.ml @@ -0,0 +1,166 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type BACKEND = sig + include Opentelemetry.Collector.BACKEND + + val register_metrics : unit -> unit +end + +let create_backend' ~filename () : (module BACKEND) = + (module struct + type 'a t = { + file: Out_channel.t + ; ch: 'a Event.channel + ; send_ns: int Atomic.t + } + + let measure sum f = + let t0 = Mtime_clock.counter () in + let r = f () in + let dt = + Mtime_clock.count t0 |> Mtime.Span.to_uint64_ns |> Int64.to_int + in + let (_ : int) = Atomic.fetch_and_add sum dt in + r + + let flush_ns = Atomic.make 0 + + let write_ns = Atomic.make 0 + + let tick_ns = Atomic.make 0 + + let handle exporter encoder {file; ch; _} = + let t = Pbrt.Encoder.create () in + let rec loop () = + match Event.(ch |> receive |> sync) with + | None -> + () + | Some [] -> + measure flush_ns @@ fun () -> flush file ; (loop [@tailcall]) () + | Some msg -> + let () = + measure write_ns @@ fun () -> + encoder (exporter msg) t ; + Pbrt.Encoder.write_chunks (Out_channel.output file) t ; + Pbrt.Encoder.clear t + in + (loop [@tailcall]) () + in + let finally () = Out_channel.close_noerr file in + Fun.protect ~finally loop + + let metrics_cb name t () = + let open Opentelemetry in + [ + Metrics.sum ~name:(name ^ " overhead") + ~aggregation_temporality:Metrics.Aggregation_temporality_cumulative + ~unit_:"ns" + [Metrics.int (Atomic.get t)] + ] + + let make exporter encoder filename = + let file = Out_channel.open_bin (filename ^ ".otel") in + let t = {file; ch= Event.new_channel (); send_ns= Atomic.make 0} in + let thread = Thread.create (handle exporter encoder) t in + (thread, t) + + let trace = + let open Opentelemetry_proto.Trace_service in + make + (fun resource_spans -> + make_export_trace_service_request ~resource_spans () + ) + encode_pb_export_trace_service_request (filename ^ ".trace") + + let metrics = + let open Opentelemetry_proto.Metrics_service in + make + (fun resource_metrics -> + make_export_metrics_service_request ~resource_metrics () + ) + encode_pb_export_metrics_service_request (filename ^ ".metrics") + + let logs = + let open Opentelemetry_proto.Logs_service in + make + (fun resource_logs -> make_export_logs_service_request ~resource_logs ()) + encode_pb_export_logs_service_request (filename ^ ".logs") + + let registered = Atomic.make false + + let register_metrics () = + if not (Atomic.exchange registered true) then begin + Opentelemetry.Metrics_callbacks.register + (metrics_cb "trace" (snd trace).send_ns) ; + Opentelemetry.Metrics_callbacks.register + (metrics_cb "logs" (snd logs).send_ns) ; + Opentelemetry.Metrics_callbacks.register + (metrics_cb "metrics" (snd metrics).send_ns) ; + Opentelemetry.Metrics_callbacks.register + (metrics_cb "file buffer write" write_ns) ; + Opentelemetry.Metrics_callbacks.register + (metrics_cb "file flush" write_ns) ; + Opentelemetry.Metrics_callbacks.register (metrics_cb "tick" tick_ns) + end + + let send (_, t) msg = + measure t.send_ns @@ fun () -> Event.(send t.ch msg |> sync) + + open Opentelemetry.Collector + + let send_trace = {send= (fun msg ~ret -> send trace (Some msg) ; ret ())} + + let send_metrics = + {send= (fun msg ~ret -> send metrics (Some msg) ; ret ())} + + let send_logs = {send= (fun msg ~ret -> send logs (Some msg) ; ret ())} + + let signal_emit_gc_metrics = ignore + + let on_tick = Atomic.make (Opentelemetry.AList.make ()) + + let invoke f = f () + + let tick () = + register_metrics () ; + measure tick_ns @@ fun () -> + on_tick |> Atomic.get |> Opentelemetry.AList.get |> List.iter invoke ; + send trace (Some []) ; + send logs (Some []) ; + send metrics (Some []) + + let set_on_tick_callbacks = Atomic.set on_tick + + let cleanup () = + send trace None ; + send logs None ; + send metrics None ; + Thread.join (fst trace) ; + Thread.join (fst metrics) ; + Thread.join (fst logs) + end +) + +let create_backend ~filename () : (module Opentelemetry.Collector.BACKEND) = + let (module B) = create_backend' ~filename () in + (module B) + +let with_setup ~filename ?enable () f = + let (module B) = create_backend' ~filename () in + Opentelemetry.Collector.with_setup_debug_backend ?enable (module B) () + @@ fun () -> + (* can only be called when backend is registered, otherwise the on_tick + callback gets registered to the wrong place *) + B.register_metrics () ; f () diff --git a/ocaml/quicktest/trace/diskBackend.mli b/ocaml/quicktest/trace/diskBackend.mli new file mode 100644 index 00000000000..cd19826829d --- /dev/null +++ b/ocaml/quicktest/trace/diskBackend.mli @@ -0,0 +1,24 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val create_backend : + filename:string -> unit -> (module Opentelemetry.Collector.BACKEND) +(** [create_backend ~filename ()] creates a backend that stores output in + [filename.logs.otel], [filename.trace.otel], [filename.metrics.otel]. *) + +val with_setup : filename:string -> ?enable:bool -> unit -> (unit -> 'a) -> 'a +(**[with_setup ~filename ?enable () f] calls {!val:create_backend ~filename], + and then sets up the created backend as the current backend for the duration + of the call to [f ()]. +*) diff --git a/ocaml/quicktest/trace/dune b/ocaml/quicktest/trace/dune new file mode 100644 index 00000000000..6086ae7cea7 --- /dev/null +++ b/ocaml/quicktest/trace/dune @@ -0,0 +1,18 @@ +(library + (name quicktest_trace) + (modules_without_implementation sampling) + (libraries + ambient-context + fmt + logs + logs.fmt + mtime + mtime.clock.os + (re_export opentelemetry) + (re_export opentelemetry.proto) + pbrt + ptime + threads.posix + unix + backtrace + )) diff --git a/ocaml/quicktest/trace/rpclib/conventions.ml b/ocaml/quicktest/trace/rpclib/conventions.ml new file mode 100644 index 00000000000..2fd9acf3a60 --- /dev/null +++ b/ocaml/quicktest/trace/rpclib/conventions.ml @@ -0,0 +1,17 @@ +let rpc_system_name = "rpc.system.name" + +let rpc_method = "rpc.method" + +let error_type = "error.type" + +let other = "_OTHER" + +let rpc_message = "rpc.message" + +let rpc_message_id = "rpc.message.id" + +let rpc_message_type = "rpc.message.type" + +let sent = "SENT" + +let received = "RECEIVED" diff --git a/ocaml/quicktest/trace/rpclib/conventions.mli b/ocaml/quicktest/trace/rpclib/conventions.mli new file mode 100644 index 00000000000..3292161e977 --- /dev/null +++ b/ocaml/quicktest/trace/rpclib/conventions.mli @@ -0,0 +1,19 @@ +(** @see *) + +val rpc_system_name : string + +val rpc_method : string + +val error_type : string + +val other : string + +val rpc_message : string + +val rpc_message_id : string + +val rpc_message_type : string + +val sent : string + +val received : string diff --git a/ocaml/quicktest/trace/rpclib/dune b/ocaml/quicktest/trace/rpclib/dune new file mode 100644 index 00000000000..1ca667c1edd --- /dev/null +++ b/ocaml/quicktest/trace/rpclib/dune @@ -0,0 +1,6 @@ +(library + (name quicktest_trace_rpc) + (libraries + quicktest_trace + rpclib.core + )) diff --git a/ocaml/quicktest/trace/rpclib/rPC.ml b/ocaml/quicktest/trace/rpclib/rPC.ml new file mode 100644 index 00000000000..86bb8fbe36c --- /dev/null +++ b/ocaml/quicktest/trace/rpclib/rPC.ml @@ -0,0 +1,133 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +open Quicktest_trace +open Conventions + +let id_sent = Atomic.make 1 + +let id_received = Atomic.make 1 + +let rpc_event ~id kind = + let id = Atomic.fetch_and_add id 1 in + Opentelemetry.Event.make + ~attrs:[(rpc_message_id, `Int id); (rpc_message_type, `String kind)] + rpc_message + +let code_of_response response = + match response.Rpc.contents with + | Rpc.Enum (Rpc.String code :: _) -> + code + | _ -> + other + +let attrs_of_error response = + [(error_type, `String (code_of_response response))] + +let rec any_value_of_rpc = + let open Opentelemetry_proto.Common in + function + | Rpc.Null -> + None + | Rpc.Bool b -> + Some (Bool_value b) + | Rpc.Int32 i32 -> + Some (Int_value (Int64.of_int32 i32)) + | Rpc.Int i -> + Some (Int_value i) + | Rpc.Float f -> + Some (Double_value f) + | Rpc.Base64 s -> + (* don't log the full Base64 entry, the UEFI NVRAM is huge *) + Some (String_value (Printf.sprintf "base64(len=%d)" (String.length s))) + | Rpc.DateTime s | Rpc.String s -> + Some (String_value s) + | Rpc.Enum lst -> + let values = lst |> List.filter_map any_value_of_rpc in + Some (Array_value (make_array_value ~values ())) + | Rpc.Dict dict -> + let values = + dict + |> List.map (fun (key, v) -> + make_key_value ~key ~value:(any_value_of_rpc v) () + ) + in + Some (Kvlist_value (make_key_value_list ~values ())) + +let log_rpc ?(time_unix_nano = Opentelemetry.Timestamp_ns.now_unix_ns ()) scope + key rpc = + let trace_id = Scope.trace_id scope |> Opentelemetry.Trace_id.to_bytes + and span_id = Scope.span_id scope |> Opentelemetry.Span_id.to_bytes in + let open Opentelemetry_proto in + let body = + Some + Common.( + Kvlist_value + (make_key_value_list + ~values:[make_key_value ~key ~value:(any_value_of_rpc rpc) ()] + () + ) + ) + in + Logs.make_log_record ~time_unix_nano ~observed_time_unix_nano:time_unix_nano + ~severity_number:Severity_number_trace ~severity_text:"TRACE" ~body + ~attributes:[] ~dropped_attributes_count:0l ~flags:0l ~trace_id ~span_id () + +let wrap ?(log_body = false) rpc call = + let attrs = + [(rpc_system_name, `String "xmlrpc"); (rpc_method, `String call.Rpc.name)] + in + let () = + if log_body then + match Scope.get_ambient_scope () with + | None -> + () + | Some scope -> + (* log the actual bodies of the RPC, + this is only for testing purposes, since they may contain secrets *) + Scope.add_log scope (fun () -> + log_rpc scope call.Rpc.name (Rpc.Enum call.Rpc.params) + ) + in + Trace.with_ ~kind:Opentelemetry.Span.Span_kind_client ~attrs call.Rpc.name + @@ fun scope -> + Scope.add_event scope (fun () -> rpc_event ~id:id_sent sent) ; + + let (response : Rpc.response) = rpc call in + + Scope.add_event scope (fun () -> rpc_event ~id:id_received received) ; + if log_body then + Scope.add_delayed_log scope (fun () -> + log_rpc scope "response" response.contents + ) ; + + if not response.Rpc.success then begin + Scope.set_status scope + @@ Span_status.make ~code:Status_code_error + ~message:(code_of_response response) ; + Scope.add_attrs scope (fun () -> attrs_of_error response) + end ; + response + +let http_headers () = + match Scope.get_ambient_scope () with + | None -> + [] + | Some scope -> + let open Opentelemetry.Trace_context.Traceparent in + let traceparent = + to_value ~trace_id:(Scope.trace_id scope) + ~parent_id:(Scope.span_id scope) () + in + Scope.add_attrs scope (fun () -> [(name, `String traceparent)]) ; + [(name, traceparent)] diff --git a/ocaml/quicktest/trace/rpclib/rPC.mli b/ocaml/quicktest/trace/rpclib/rPC.mli new file mode 100644 index 00000000000..aa539f0d86b --- /dev/null +++ b/ocaml/quicktest/trace/rpclib/rPC.mli @@ -0,0 +1,41 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val log_rpc : + ?time_unix_nano:Opentelemetry.Timestamp_ns.t + -> Quicktest_trace.Scope.t + -> string + -> Rpc.t + -> Opentelemetry.Logs.t +(** [log_rpc ?time_unix_nano scope key rpc] logs the [rpc] at timestamp + [time_unix_nano] with the specific [key]. + + This is useful for structured logging (the full nested dictionary is + retained in Opentelemetry format, it isn't converted to a string). +*) + +val wrap : + ?log_body:bool -> (Rpc.call -> Rpc.response) -> Rpc.call -> Rpc.response +(** [wrap ?log_body rpc] wraps an [rpc] call: creates an Opentelemetry span + following the semantic {!module:Conventions} for RPCs. + + [log_body] can be set to [true] to also logs the body of the request, and on error the body of the reply. + This is suitable for testing, but not production use (it may log sensitive information) +*) + +val http_headers : unit -> (string * string) list +(** [http_headers ()] returns the additional HTTP headers to insert for W3C + TraceContext propagation. + @see +*) diff --git a/ocaml/quicktest/trace/sampler.ml b/ocaml/quicktest/trace/sampler.ml new file mode 100644 index 00000000000..15f31a6b667 --- /dev/null +++ b/ocaml/quicktest/trace/sampler.ml @@ -0,0 +1,45 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let should_sample ?parent_scope ?trace_id:_ ?kind:_ ?(attrs = []) ?links:_ + ?trace_state name = + let parent_sampled = + match parent_scope with + | None -> + (* run_with_progress is too verbose, but we want to log everything else + for debugging *) + name <> "run_with_progress" + | Some scope -> + Scope.is_sampled scope + in + let decision = + if Opentelemetry.Collector.has_backend () then + (* for tail-based sampling to work, this must not be DROP *) + if parent_sampled then + Sampling.RECORD_AND_SAMPLE + else + RECORD_ONLY + else + (* without a backend these wouldn't go anywhere, so don't record them in the first place *) + DROP + in + let attrs = + if decision = DROP then + [] + else + attrs + in + Sampling.{decision; attrs; trace_state} + +let get_description () = "ParentBasedOrRecordOnly" diff --git a/ocaml/quicktest/trace/sampler.mli b/ocaml/quicktest/trace/sampler.mli new file mode 100644 index 00000000000..5651984d99c --- /dev/null +++ b/ocaml/quicktest/trace/sampler.mli @@ -0,0 +1,34 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** @see *) + +val should_sample : + ?parent_scope:Scope.t + -> ?trace_id:Opentelemetry.Trace_id.t + -> ?kind:Opentelemetry.Span.kind + -> ?attrs:Opentelemetry.key_value list + -> ?links:Opentelemetry.Span_link.t list + -> ?trace_state:string + -> string + -> Sampling.result +(** [should_sample ?parent_scope ?trace_id ?kind ?attrs ?links ?trace_state name] + returns a (head) sampling decision for the given span. + It should pass through [trace_state] unchanged if it doesn't intend to + change it, otherwise it overrides the caller supplied one. + The sampling decision can be changed later. +*) + +val get_description : unit -> string +(** [get_description ()] is the name of the current sampler. *) diff --git a/ocaml/quicktest/trace/sampling.mli b/ocaml/quicktest/trace/sampling.mli new file mode 100644 index 00000000000..203b42e5578 --- /dev/null +++ b/ocaml/quicktest/trace/sampling.mli @@ -0,0 +1,34 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** @see *) + +open Opentelemetry + +type decision = + | DROP + (** all events, attributes, logs and metrics associated with the span + will be dropped, and the span itself won't be emitted to the backend *) + | RECORD_ONLY + (** all events, attributes, logs and metrics are recorded, but + will be dropped at the end, unless the sampling decision is changed *) + | RECORD_AND_SAMPLE + (** record all events, attributes, logs and metrics, and emit the span to the + backend*) + +type result = { + decision: decision (** the sampling decision for the Span *) + ; attrs: (string * value) list (** attributes to add to the Span *) + ; trace_state: string option (** override the trace state *) +} diff --git a/ocaml/quicktest/trace/scope.ml b/ocaml/quicktest/trace/scope.ml new file mode 100644 index 00000000000..7b965a5fd51 --- /dev/null +++ b/ocaml/quicktest/trace/scope.ml @@ -0,0 +1,146 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Opentelemetry + +let capacity = Atomic.make 100 + +let set_capacity = Atomic.set capacity + +type t = { + scope: Scope.t + ; attrs: key_value list Bounded.t + ; events: Event.t Bounded.t + ; metrics: Metrics.t Bounded.t + ; logs: Logs.t Bounded.t + ; delayed_logs: (unit -> Logs.t) Bounded.t + ; mutable status: Span_status.t option + ; mutable decision: Sampling.decision +} + +let make scope decision = + let capacity = Atomic.get capacity in + { + scope + ; attrs= Bounded.make capacity + ; events= Bounded.make capacity + ; metrics= Bounded.make capacity + ; logs= Bounded.make capacity + ; delayed_logs= Bounded.make capacity + ; decision + ; status= None + } + +let ambient_scope_key = Ambient_context.create_key () + +let[@inline always] get_ambient_scope ?scope () = + match scope with + | Some _ -> + scope + | None -> + Ambient_context.get ambient_scope_key + +let[@inline always] with_ambient_scope t f = + Ambient_context.with_binding ambient_scope_key t (fun _ -> f ()) + +let[@inline always] trace_id t = t.scope.Scope.trace_id + +let[@inline always] span_id t = t.scope.Scope.span_id + +let[@inline always] to_span_ctx t = Scope.to_span_ctx t.scope + +let[@inline always] is_recording t = + match t.decision with + | DROP -> + false + | RECORD_ONLY | RECORD_AND_SAMPLE -> + true + +let[@inline always] is_sampled t = + match t.decision with + | DROP | RECORD_ONLY -> + false + | RECORD_AND_SAMPLE -> + true + +let[@inline always] set_decision t decision = t.decision <- decision + +let[@inline always] scope t = t.scope + +let[@inline always] status t = t.status + +let[@inline always] set_status t status = t.status <- Some status + +let[@inline always] bounded_add_ignore_exn ~__FUNCTION__ queue t f = + if is_recording t then Bounded.add queue @@ f () + +let[@inline always] add_attrs t = bounded_add_ignore_exn ~__FUNCTION__ t.attrs t + +let[@inline always] add_event t = + bounded_add_ignore_exn ~__FUNCTION__ t.events t + +let[@inline always] add_metrics t = + bounded_add_ignore_exn ~__FUNCTION__ t.metrics t + +let[@inline always] add_log t f = + if is_recording t then + let log = f () in + if log.Opentelemetry_proto.Logs.severity_number >= Severity_number_info then + Logs.emit [log] + else + Bounded.add t.logs log + +let[@inline always] add_delayed_log t f = + if is_recording t then Bounded.add t.delayed_logs f + +let update_log t log = + let span_id = t |> span_id |> Span_id.to_bytes + and trace_id = t |> trace_id |> Trace_id.to_bytes in + Opentelemetry_proto.Logs.{log with span_id; trace_id} + +(* may need to update or add exemplars to link *) +let update_metric = Fun.id + +let bounded_iter_delayed b f = b |> Bounded.to_seq |> Seq.iter f + +let bounded_iter b f = bounded_iter_delayed b @@ fun item -> f (fun () -> item) + +let bounded_emit b update f = + b |> Bounded.to_seq |> Seq.map update |> List.of_seq |> f + +let finish_span t = + bounded_iter t.attrs (Scope.add_attrs t.scope) ; + bounded_iter t.events (Scope.add_event t.scope) ; + Bounded.clear t.attrs ; + Bounded.clear t.events + +let finish_logs_metrics t = + if is_sampled t then begin + bounded_emit t.metrics update_metric Metrics.emit ; + + (* we don't have access to report the dropped counts in the proto *) + + (* transfer delayed logs to regular logs, must be done before regular logs + are emitted *) + bounded_iter_delayed t.delayed_logs (add_log t) ; + Bounded.clear t.delayed_logs ; + + bounded_emit t.logs (update_log t) Logs.emit + end ; + + (* ensure we don't emit duplicates if this function gets called multiple + times *) + Bounded.clear t.metrics ; + Bounded.clear t.logs ; + Bounded.clear t.delayed_logs diff --git a/ocaml/quicktest/trace/scope.mli b/ocaml/quicktest/trace/scope.mli new file mode 100644 index 00000000000..8773ab890a9 --- /dev/null +++ b/ocaml/quicktest/trace/scope.mli @@ -0,0 +1,132 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** Wraps {!module:Opentelemetry.Scope}. + Adds support for bounded items (attributes, events, logs and metrics) + associated with a span, and sampling. +*) + +val set_capacity : int -> unit +(** [set_capacity capacity] sets the maximum number of + attributes/events/logs/metrics that a span scope can store. + Changes will take effect for newly created span scopes only. +*) + +(** Current span scope *) +type t + +val make : Opentelemetry.Scope.t -> Sampling.decision -> t +(** [make oscope decision] initializes a scope with the Opentelemetry [oscope] and a sampling decision. + + {ul + {- {!val:Sampling.DROP} won't record any attributes,events,metrics,logs and drops the span itself} + {- {!val:Sampling.RECORD_AND_SAMPLE} records all attributes,events,metrics,logs and emits the span,metrics and logs. + {- {!val:Sampling.RECORD_ONLY} records all attributes,events,metrics,logs but doesn't emit the span, metrics or logs. + This can be changed to {!val:Sampling.RECORD_AND_SAMPLE} by a Tail-based + sampling processor (e.g. to always sample all errors) using {!val:set_decision} + } +*) + +val trace_id : t -> Opentelemetry.Trace_id.t +(** [trace_id t] the {!val:Trace_id.t} of the current span scope *) + +val span_id : t -> Opentelemetry.Span_id.t +(** [span_id t] the {!val:Span_id.t} of the current span scope *) + +val to_span_ctx : t -> Opentelemetry.Span_ctx.t +(** [to_span_ctx t] the {!val:Span_ctx.t} of the current span scope *) + +val is_recording : t -> bool +(** [is_recording t] is true when the sampling decision is + {!val:Sampling.RECORD_ONLY} or {!val:Sampling.RECORD_AND_SAMPLE} *) + +val is_sampled : t -> bool +(** [is_sampled t] is true when the sampling decision is {!val:Sampling.RECORD_AND_SAMPLE} *) + +val set_decision : t -> Sampling.decision -> unit +(** [set_decision t] changes the sampling decision on the current span scope. *) + +val status : t -> Span_status.t option +(** [status t] is the current span status, if it has been set *) + +val set_status : t -> Span_status.t -> unit +(** [set_status t status] sets the span's status to [t]. + If a span's sampling decision is {!val:RECORD_ONLY} this + automatically upgrades it to {!val:RECORD_AND_SAMPLE} +*) + +val add_attrs : t -> (unit -> Opentelemetry.key_value list) -> unit +(** [add_attrs t f] calls [f ()] when [is_recording t] is true. + If this exceeds the span capacity then the oldest attributes are dropped. + + The call may be deferred to span completion since it relies only on + immutable values, and contains no timestamps. + (if an attribute can only be obtain via a side effect then it should be + a metric or an event instead). + However for consistency with the other [add_*] functions [f ()] is + currently called immediately too. +*) + +val add_event : t -> (unit -> Opentelemetry.Event.t) -> unit +(** [add_events t f] calls [f ()] when [is_recording t] is true. + If this exceeds the span capacity then the oldest events are dropped. + The call to [f ()] is done immediately, because events contain a timestamp + recorded at creation time, and this cannot be deferred. +*) + +val add_metrics : t -> (unit -> Opentelemetry.Metrics.t) -> unit +(** [add_metrics t f] calls [f ()] when [is_recording t] is true. + If this exceeds the span capacity then the oldest metrics are dropped. + The call to [f ()] is done immediately, because events contain a timestamp + recorded at creation time, and may have side effects to sample state, + and this cannot be deferred. +*) + +val add_log : t -> (unit -> Opentelemetry.Logs.t) -> unit +(** [add_log t f] may call [f ()] when [is_recording t] is true. + The call to [f ()] is done immediately, because formatting logs may have + side-effects, e.g. reading global state. + If the log is expensive to compute, consider using {!val:add_delayed_log} + instead. + Logs at level info and above are always emitted immediately. +*) + +val add_delayed_log : t -> (unit -> Opentelemetry.Logs.t) -> unit +(** [add_log t f] may call [f ()] when [is_sampled t] is true at span completion. *) + +(* for internal use in Trace *) + +val scope : t -> Opentelemetry.Scope.t +(** [scope t] is the underlying Opentelemetry Scope *) + +val finish_span : t -> unit +(** [finish_span t] when [is_sampled ()] it transfers all attributes/events to + {!val:Opentelemetry.Scope.t}. + + This function should only be called once, otherwise span capacity limits + cannot be guaranteed. +*) + +val finish_logs_metrics : t -> unit +(** [finish_logs_metrics t] when [is_sampled ()] it emits all metrics/logs/delayed logs. + + This function should only be called once, otherwise span capacity limits + cannot be guaranteed. +*) + +val ambient_scope_key : t Ambient_context.key + +val get_ambient_scope : ?scope:t -> unit -> t option + +val with_ambient_scope : t -> (unit -> 'a) -> 'a diff --git a/ocaml/quicktest/trace/spanProcessor.ml b/ocaml/quicktest/trace/spanProcessor.ml new file mode 100644 index 00000000000..208a968127a --- /dev/null +++ b/ocaml/quicktest/trace/spanProcessor.ml @@ -0,0 +1,39 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let ( let@ ) t f = Option.iter f t + +let on_start _scope = () + +let is_error t = + match Scope.status t with + | Some {code= Status_code_error; _} -> + true + | _ -> + false + +let on_end t = + if not Scope.(is_sampled t) then begin + if is_error t then + (* tail-based sampling: upgrade RECORD_ONLY to RECORD_AND_SAMPLE on error *) + Scope.set_decision t RECORD_AND_SAMPLE + end + +let force_flush () = + let@ (module B) = Opentelemetry.Collector.get_backend () in + B.tick () + +let shutdown () = + (* backend cleanup is done elsewhere already, so only flush *) + force_flush () diff --git a/ocaml/quicktest/trace/spanProcessor.mli b/ocaml/quicktest/trace/spanProcessor.mli new file mode 100644 index 00000000000..e5ce9aad20b --- /dev/null +++ b/ocaml/quicktest/trace/spanProcessor.mli @@ -0,0 +1,39 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** @see . + + Only an approximation of the interface. + + Currently implements + that samples all spans containing an error only. +*) + +val on_start : Scope.t -> unit +(** [on_start scope] gets called synchronously when a span (scope) is created. + Can change [scope]. +*) + +val on_end : Scope.t -> unit +(** [on_end scope] gets called synchronously when a span (scope) finishes. + Should not change the [scope], except for calling [Scope.set_decision]. +*) + +val force_flush : unit -> unit +(** [force_flush ()] tells the backend to flush. + This is not directly supported, it currently calls [Backend.tick ()]. +*) + +val shutdown : unit -> unit +(** [shutdown ()] shuts down the backend *) diff --git a/ocaml/quicktest/trace/span_status.ml b/ocaml/quicktest/trace/span_status.ml new file mode 100644 index 00000000000..ee66a0d9eeb --- /dev/null +++ b/ocaml/quicktest/trace/span_status.ml @@ -0,0 +1,26 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(* TODO: use the one from newer opentelemetry *) + +open Opentelemetry_proto.Trace + +type t = status + +type code = status_status_code = + | Status_code_unset + | Status_code_ok + | Status_code_error + +let make ~message ~code = default_status ~message ~code () diff --git a/ocaml/quicktest/trace/span_status.mli b/ocaml/quicktest/trace/span_status.mli new file mode 100644 index 00000000000..bbe83cd7d6d --- /dev/null +++ b/ocaml/quicktest/trace/span_status.mli @@ -0,0 +1,25 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type t = Opentelemetry_proto.Trace.status + +type code = Opentelemetry_proto.Trace.status_status_code = + | Status_code_unset + | Status_code_ok + | Status_code_error + +val make : + message:string + -> code:Opentelemetry_proto.Trace.status_status_code + -> Opentelemetry_proto.Trace.status diff --git a/ocaml/quicktest/trace/teeBackend.ml b/ocaml/quicktest/trace/teeBackend.ml new file mode 100644 index 00000000000..9acfdcd72a8 --- /dev/null +++ b/ocaml/quicktest/trace/teeBackend.ml @@ -0,0 +1,97 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Opentelemetry + +module Backend (B1 : Collector.BACKEND) (B2 : Collector.BACKEND) : + Collector.BACKEND = struct + let send_trace = + Collector. + { + send= + (fun msg ~ret -> + B1.send_trace.send msg ~ret:Fun.id ; + B2.send_trace.send msg ~ret + ) + } + + let send_metrics = + Collector. + { + send= + (fun msg ~ret -> + B1.send_metrics.send msg ~ret:Fun.id ; + B2.send_metrics.send msg ~ret + ) + } + + let send_logs = + Collector. + { + send= + (fun msg ~ret -> + B1.send_logs.send msg ~ret:Fun.id ; + B2.send_logs.send msg ~ret + ) + } + + let signal_emit_gc_metrics () = + B1.signal_emit_gc_metrics () ; + B2.signal_emit_gc_metrics () + + let tick () = B1.tick () ; B2.tick () + + let set_on_tick_callbacks t = + B1.set_on_tick_callbacks t ; B2.set_on_tick_callbacks t + + let cleanup () = B1.cleanup () ; B2.cleanup () +end + +let with_setup (module B1 : Collector.BACKEND) (module B2 : Collector.BACKEND) + ?enable () f = + let module B = Backend (B1) (B2) in + Collector.with_setup_debug_backend ?enable (module B) () f + +let ticker interval = + while true do + let () = + try + match Collector.get_backend () with + | Some (module B : Collector.BACKEND) -> + B.tick () + | None -> + () + with _ -> () + in + (* this will drift, but during load that is what we want: + we don't want the tick thread to add extra load + *) + Unix.sleepf interval + done + +let setup_tick ?(interval = 60.) () = + let (_ : Thread.t) = Thread.create ticker interval in + () + +let with_default_setup ?(filename = "trace") ?interval ?enable () f = + Out_channel.with_open_text (filename ^ ".log") @@ fun trace_log -> + let formatter = Format.formatter_of_out_channel trace_log in + let module Text = (val ConsoleBackend.create_backend ~formatter ()) in + let module OTLP = (val DiskBackend.create_backend ~filename ()) in + let module Disk = Backend (OTLP) (Text) in + setup_tick ?interval () ; + with_setup + (ConsoleBackend.create_backend ~severity:Severity_number_info ()) + (module Disk) + ?enable () f diff --git a/ocaml/quicktest/trace/teeBackend.mli b/ocaml/quicktest/trace/teeBackend.mli new file mode 100644 index 00000000000..bdfef3a1358 --- /dev/null +++ b/ocaml/quicktest/trace/teeBackend.mli @@ -0,0 +1,49 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Opentelemetry.Collector + +(** Backend(B1)(B2) sends data to both B1 and B2. + If B1 fails then B2 is never called. +*) +module Backend : functor (_ : BACKEND) (_ : BACKEND) -> BACKEND + +val with_setup : + (module BACKEND) + -> (module BACKEND) + -> ?enable:bool + -> unit + -> (unit -> 'a) + -> 'a +(** [with_setup b1 b2 ?enable () f] creates a new {!module:Backend} that outputs +to both [b1] and [b2] backends, and sets it as the current backend for the +duration of the call to [f ()]*) + +val setup_tick : ?interval:float -> unit -> unit +(** [setup_tick ?interval ()] calls [B.tick ()] on the current Backend every + [interval] seconds. *) + +val with_default_setup : + ?filename:string + -> ?interval:float + -> ?enable:bool + -> unit + -> (unit -> 'a) + -> 'a +(** [with_default_setup ?filename ?interval ?enable () f] + sets up 3 backends: a console backend that logs only info and above, a text dump, and a binary (OTLP) dump. + Periodic flushing every [interval] is also enabled. + + The backends are only set during the call to [f ()] +*) diff --git a/ocaml/quicktest/trace/test/.gitignore b/ocaml/quicktest/trace/test/.gitignore new file mode 100644 index 00000000000..f3fab411070 --- /dev/null +++ b/ocaml/quicktest/trace/test/.gitignore @@ -0,0 +1 @@ +*.otel diff --git a/ocaml/quicktest/trace/test/dune b/ocaml/quicktest/trace/test/dune new file mode 100644 index 00000000000..5803eff4f3d --- /dev/null +++ b/ocaml/quicktest/trace/test/dune @@ -0,0 +1,3 @@ +(test + (name tracetest) + (libraries quicktest_trace quicktest_trace_rpc fmt.tty logs logs.fmt opentelemetry rpclib.core)) diff --git a/ocaml/quicktest/trace/test/tracetest.ml b/ocaml/quicktest/trace/test/tracetest.ml new file mode 100644 index 00000000000..c423d9328b5 --- /dev/null +++ b/ocaml/quicktest/trace/test/tracetest.ml @@ -0,0 +1,68 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Quicktest_trace +open Quicktest_trace_rpc + +let () = + Printexc.record_backtrace true ; + Fmt_tty.setup_std_outputs () ; + Opentelemetry.Globals.service_name := "tracetest" ; + Logs.set_level (Some Logs.Debug) ; + Logs.set_reporter Logs_fmt.(reporter ~pp_header ()) ; + TeeBackend.with_default_setup () @@ fun () -> + let (_ : _ result) = + Trace.with_result Fun.id "OK (not shown)" @@ fun _ -> Ok () + in + let (_ : _ result) = + Trace.with_result Fun.id "OK (sampled)" @@ fun scope -> + Scope.set_decision scope Sampling.RECORD_AND_SAMPLE ; + Ok () + in + let (_ : _ result) = + Trace.with_result Fun.id "Error (shown)" @@ fun _ -> Error "test" + in + let () = Trace.with_ "OK (not shown)" @@ fun _ -> () in + let () = + try + Trace.with_ "Exception (shown)" @@ fun scope -> + Scope.add_event scope (fun () -> Opentelemetry.Event.make "foo") ; + Scope.add_log scope (fun () -> Opentelemetry.Logs.make_str "log1") ; + Scope.add_log scope (fun () -> Opentelemetry.Logs.make_str "log2") ; + failwith "TEST" + with Failure _ -> () + in + let (_ : (_, _) result) = + Trace.with_result (fun _ -> "") "rpc" @@ fun scope -> + Scope.set_decision scope Sampling.RECORD_AND_SAMPLE ; + Error + (RPC.wrap ~log_body:true + (fun _ -> + let (_ : _ list) = RPC.http_headers () in + Rpc. + { + success= false + ; contents= Rpc.String "failed" + ; is_notification= false + } + ) + Rpc. + { + name= "test1" + ; params= [Rpc.Int 4L; Rpc.Int 5L] + ; is_notification= false + } + ) + in + () diff --git a/ocaml/quicktest/trace/trace.ml b/ocaml/quicktest/trace/trace.ml new file mode 100644 index 00000000000..3ec0c52bf06 --- /dev/null +++ b/ocaml/quicktest/trace/trace.ml @@ -0,0 +1,133 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module MyScope = Scope +open Opentelemetry +include Opentelemetry.Trace + +let ok = Ok () + +let add_event = `use_Scope + +let add_attrs = `use_Scope + +let no_scope = + MyScope.make + Scope. + {span_id= Span_id.dummy; trace_id= Trace_id.dummy; events= []; attrs= []} + Sampling.DROP + +(* workaround for old Opentelemetry version, newer versions support this + directly *) +let set_status_from_result scope result = + if Option.is_none @@ MyScope.status scope then + MyScope.set_status scope + @@ + match result with + | Ok () -> + Span_status.(make ~code:Status_code_ok ~message:"") + | Error (message, _) -> + Span_status.(make ~code:Status_code_error ~message) + +let trace_result_of scope result = + match (result, MyScope.status scope) with + | Ok (), Some Span_status.{code= Status_code_error; message} -> + Error (message, Printexc.get_callstack 0) + | _ -> + result + +let with_' ?(force_new_trace_id = false) ?trace_state ?service_name ?attrs ?kind + ?trace_id ?parent ?scope ?links name f = + let scope = MyScope.get_ambient_scope ?scope () in + let parent_scope = + if force_new_trace_id then + None + else + scope + in + let sampling = + Sampler.should_sample ?parent_scope ?trace_id ?kind ?attrs ?links + ?trace_state name + in + let my_scope = ref no_scope in + + let f oscope = + let scope = MyScope.make oscope sampling.decision in + my_scope := scope ; + MyScope.with_ambient_scope scope @@ fun () -> + if MyScope.is_recording scope then SpanProcessor.on_start scope ; + MyScope.add_attrs scope (fun () -> sampling.attrs) ; + try f scope + with e -> + let bt = Printexc.get_raw_backtrace () in + Backtrace.is_important e ; + MyScope.add_attrs scope (fun () -> + [ + ( "exception.stacktrace" + , `String (e |> Backtrace.get |> Backtrace.to_string_hum) + ) + ] + ) ; + Printexc.raise_with_backtrace e bt + in + + let thunk, finally = + with_' ~force_new_trace_id ?trace_state:sampling.trace_state ?service_name + ?kind ?trace_id ?parent + ?scope:Option.(map MyScope.scope scope) + ?links name f + in + + let finally result = + let scope = !my_scope in + set_status_from_result scope result ; + MyScope.finish_span scope ; + if MyScope.is_recording scope then SpanProcessor.on_end scope ; + + if MyScope.is_sampled scope then + (* only send sampled spans to the backend, drop others *) + trace_result_of scope result |> finally ; + (* emit span first, logs and metrics after, to retain + order when printed to console *) + MyScope.finish_logs_metrics scope + in + (thunk, finally) + +let with_ ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind ?trace_id + ?parent ?scope ?links name f = + let thunk, finally = + with_' ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind ?trace_id + ?parent ?scope ?links name f + in + try + let r = thunk () in + finally ok ; r + with exn -> + let bt = Printexc.get_raw_backtrace () in + Backtrace.is_important exn ; + finally (Error (Printexc.to_string exn, bt)) ; + Printexc.raise_with_backtrace exn bt + +let with_result ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind + ?trace_id ?parent ?scope ?links error_to_string name f = + let thunk, finally = + with_' ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind ?trace_id + ?parent ?scope ?links name f + in + match thunk () with + | Ok _ as r -> + finally ok ; r + | Error e as r -> + finally (Error (error_to_string e, Printexc.get_callstack 0)) ; + r diff --git a/ocaml/quicktest/trace/trace.mli b/ocaml/quicktest/trace/trace.mli new file mode 100644 index 00000000000..d90fe8bf9eb --- /dev/null +++ b/ocaml/quicktest/trace/trace.mli @@ -0,0 +1,73 @@ +(* + * Copyright (c) Cloud Software Group, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +include module type of Opentelemetry.Trace + +val add_event : [`use_Scope] +(** [add_event] removed, deprecated upstream *) + +val add_attrs : [`use_Scope] +(** [add_attrs] removed, deprecated upstream *) + +val with_' : + ?force_new_trace_id:bool + -> ?trace_state:string + -> ?service_name:string + -> ?attrs:Opentelemetry.key_value list + -> ?kind:Opentelemetry.Span.kind + -> ?trace_id:Opentelemetry.Trace_id.t + -> ?parent:Opentelemetry.Span.id + -> ?scope:Scope.t + -> ?links:Opentelemetry.Span_link.t list + -> string + -> (Scope.t -> 'a) + -> (unit -> 'a) * ((unit, string * Printexc.raw_backtrace) result -> unit) +(** [with_'] is like {!val:Opentelemetry.Trace.with'}, but with a custom {!module:Scope}. + This scope supports metrics and logs, and (tail) sampling. +*) + +val with_ : + ?force_new_trace_id:bool + -> ?trace_state:string + -> ?service_name:string + -> ?attrs:Opentelemetry.key_value list + -> ?kind:Opentelemetry.Span.kind + -> ?trace_id:Opentelemetry.Trace_id.t + -> ?parent:Opentelemetry.Span.id + -> ?scope:Scope.t + -> ?links:Opentelemetry.Span_link.t list + -> string + -> (Scope.t -> 'a) + -> 'a +(** [with_] is like {!val:Opentelemetry.Trace.with_}, but with a custom {!module:Scope}. + This scope supports metrics and logs, and (tail) sampling. +*) + +val with_result : + ?force_new_trace_id:bool + -> ?trace_state:string + -> ?service_name:string + -> ?attrs:Opentelemetry.key_value list + -> ?kind:Opentelemetry.Span.kind + -> ?trace_id:Opentelemetry.Trace_id.t + -> ?parent:Opentelemetry.Span.id + -> ?scope:Scope.t + -> ?links:Opentelemetry.Span_link.t list + -> ('b -> string) + -> string + -> (Scope.t -> ('a, 'b) result) + -> ('a, 'b) result +(** [with_] is like {!val:with_}, but it considers spans failed when they + return an error result, not just when they raise an exception. +*) diff --git a/ocaml/tests/bench/bench_backtrace.ml b/ocaml/tests/bench/bench_backtrace.ml new file mode 100644 index 00000000000..bad0bb9773f --- /dev/null +++ b/ocaml/tests/bench/bench_backtrace.ml @@ -0,0 +1,34 @@ +open Bechamel + +let finally () = Sys.opaque_identity () + +let fun_protect f = Fun.protect ~finally f + +let stdext_finally f = Xapi_stdext_pervasives.Pervasiveext.finally f finally + +exception Test + +let raise_local () = raise Test + +let test wrapper f () = try wrapper (Sys.opaque_identity f) with Test -> () + +let test_backtrace_is_important () = + try (Sys.opaque_identity raise_local) () with e -> Backtrace.is_important e + +let benchmarks = + [ + Test.make ~name:"Fun.protect noop" (Staged.stage @@ test fun_protect ignore) + ; Test.make ~name:"finally noop" (Staged.stage @@ test stdext_finally ignore) + ; Test.make ~name:"Fun.protect raise" + (Staged.stage @@ test fun_protect raise_local) + ; Test.make ~name:"finally raise" + (Staged.stage @@ test stdext_finally raise_local) + ; Test.make ~name:"Backtrace.is_important" + (Staged.stage @@ test_backtrace_is_important) + ] + +let () = + Printexc.record_backtrace true ; + () + |> Debug.with_thread_associated "main" @@ fun () -> + Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune index a61bafa186c..e7cd339cdd3 100644 --- a/ocaml/tests/bench/dune +++ b/ocaml/tests/bench/dune @@ -5,6 +5,7 @@ bench_throttle2 bench_cached_reads bench_vdi_allowed_operations + bench_backtrace bench_pool_field) (libraries dune-build-info @@ -26,7 +27,9 @@ tests_common log unix + backtrace xapi_database xapi_datamodel xapi_internal + xapi-stdext-pervasives xapi-stdext-threads)) diff --git a/ocaml/tests/dune b/ocaml/tests/dune index f829e72c88c..744d09f53c4 100644 --- a/ocaml/tests/dune +++ b/ocaml/tests/dune @@ -32,7 +32,7 @@ threads.posix uuid unix - xapi-backtrace + backtrace xapi-consts xapi-datamodel xapi-idl diff --git a/ocaml/xapi-cli-server/dune b/ocaml/xapi-cli-server/dune index b3c014ec56f..cbbc3048b69 100644 --- a/ocaml/xapi-cli-server/dune +++ b/ocaml/xapi-cli-server/dune @@ -34,7 +34,7 @@ tar threads.posix unix - xapi-backtrace + backtrace xapi-consts xapi_version xapi_database diff --git a/ocaml/xapi-client/tasks.ml b/ocaml/xapi-client/tasks.ml index a9da21890ec..abfbb503025 100644 --- a/ocaml/xapi-client/tasks.ml +++ b/ocaml/xapi-client/tasks.ml @@ -23,7 +23,8 @@ module TaskSet = Set.Make (struct end) (* Return once none of the tasks have a `pending status. *) -let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks ~callback = +let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks ~callback + ~on_progress = let classes = List.map (fun task -> Printf.sprintf "task/%s" (Ref.string_of task)) tasks in @@ -68,6 +69,7 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks ~callback = (fun (task_set', completed_task_count, _) record -> match record with | Event_helper.Task (t, Some t_rec) -> + on_progress t completed_task_count t_rec.API.task_progress ; if TaskSet.mem t task_set' && t_rec.API.task_status <> `pending @@ -107,12 +109,19 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks ~callback = let wait_for_all ~rpc ~session_id ~tasks = wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks - ~callback:(fun _ _ -> [] - ) + ~callback:(fun _ _ -> []) + ~on_progress:(fun _ _ _ -> ()) |> ignore let wait_for_all_with_callback ~rpc ~session_id ~tasks ~callback = wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks ~callback + ~on_progress:(fun _ _ _ -> () + ) + |> ignore + +let wait_for_all_with_progress ~rpc ~session_id ~tasks ~callback ~on_progress = + wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks ~callback + ~on_progress |> ignore let with_tasks_destroy ~rpc ~session_id ~timeout ~tasks = @@ -121,8 +130,8 @@ let with_tasks_destroy ~rpc ~session_id ~timeout ~tasks = if not (wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some timeout) ~tasks - ~callback:(fun _ _ -> [] - ) + ~callback:(fun _ _ -> []) + ~on_progress:(fun _ _ _ -> ()) ) then ( D.info "Canceling tasks" ; @@ -134,8 +143,8 @@ let with_tasks_destroy ~rpc ~session_id ~timeout ~tasks = tasks ; (* cancel is not immediate, give it a reasonable chance to take effect *) wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some 60.) ~tasks - ~callback:(fun _ _ -> [] - ) + ~callback:(fun _ _ -> []) + ~on_progress:(fun _ _ _ -> ()) |> ignore ; false ) else diff --git a/ocaml/xapi-client/tasks.mli b/ocaml/xapi-client/tasks.mli index a396c569aef..8fed9a50dbd 100644 --- a/ocaml/xapi-client/tasks.mli +++ b/ocaml/xapi-client/tasks.mli @@ -43,6 +43,18 @@ val wait_for_all_with_callback : and return an empty list. *) +val wait_for_all_with_progress : + rpc:(Rpc.call -> Rpc.response) + -> session_id:API.ref_session + -> tasks:API.ref_task list + -> callback:(int -> API.ref_task -> API.ref_task list) + -> on_progress:(API.ref_task -> int -> float -> unit) + -> unit +(** [wait_for_all_with_progress ~rpc ~session_id ~tasks ~callback ~on_progress] + is like {!val:wait_for_all_with_callback}, but also invokes [on_progress] + with the per-task progress every time there is an event on the task. +*) + val with_tasks_destroy : rpc:(Rpc.call -> Rpc.response) -> session_id:API.ref_session diff --git a/ocaml/xapi-guard/lib/dune b/ocaml/xapi-guard/lib/dune index cb6345496da..79d33ad4d66 100644 --- a/ocaml/xapi-guard/lib/dune +++ b/ocaml/xapi-guard/lib/dune @@ -48,7 +48,7 @@ unix uuidm uri - xapi-backtrace + backtrace xapi-consts xapi-idl xapi-idl.guard.privileged diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index d91ba09b2c4..be492c9762b 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -27,7 +27,7 @@ uri uuidm xapi_timeslice - xapi-backtrace + backtrace xapi-consts xapi-log xapi-open-uri @@ -57,7 +57,7 @@ tracing threads.posix unix - xapi-backtrace + backtrace xapi-idl xapi-log xapi-stdext-pervasives diff --git a/ocaml/xapi-idl/lib/task_server.ml b/ocaml/xapi-idl/lib/task_server.ml index 0053015387d..4a0a90e3ab5 100644 --- a/ocaml/xapi-idl/lib/task_server.ml +++ b/ocaml/xapi-idl/lib/task_server.ml @@ -207,6 +207,8 @@ functor try SMap.find id !(tasks.task_map) with _ -> raise (Interface.does_not_exist ("task", id)) + let backtrace_of t = t.backtrace + let to_interface_task t = { Interface.Task.id= t.id diff --git a/ocaml/xapi-idl/lib/task_server.mli b/ocaml/xapi-idl/lib/task_server.mli index 1f1433972de..d382de087dd 100644 --- a/ocaml/xapi-idl/lib/task_server.mli +++ b/ocaml/xapi-idl/lib/task_server.mli @@ -69,6 +69,8 @@ module Task : functor (Interface : INTERFACE) -> sig val to_interface_task : task_handle -> Interface.Task.t + val backtrace_of : task_handle -> Backtrace.t + (* [add tasks dbg f] adds a new task with debug string [dbg] that will execute [f] when run *) val add : diff --git a/ocaml/xapi-idl/lib/xcp_service.ml b/ocaml/xapi-idl/lib/xcp_service.ml index b973a26c2da..ae2b19dcd48 100644 --- a/ocaml/xapi-idl/lib/xcp_service.ml +++ b/ocaml/xapi-idl/lib/xcp_service.ml @@ -443,6 +443,7 @@ let configure_common ~options ~resources arg_parse_fn = (* Register the Logs reporter to ensure we get log messages from libraries using Logs *) Debug.init_logs () ; + let () = try Unix.gethostname () |> Debug.set_backtrace_name with _ -> () in let resources = default_resources @ resources in let config_spec = common_options @ options @ to_opt resources in (* It's very confusing if there are duplicate key names *) diff --git a/ocaml/xapi-storage-script/dune b/ocaml/xapi-storage-script/dune index 9f1f798df52..2ff8bb289b7 100644 --- a/ocaml/xapi-storage-script/dune +++ b/ocaml/xapi-storage-script/dune @@ -44,7 +44,7 @@ uri threads.posix unix - xapi-backtrace + backtrace xapi-consts xapi-consts.xapi_version xapi-idl diff --git a/ocaml/xapi/create_misc.ml b/ocaml/xapi/create_misc.ml index c5d3aa567b0..3c2d680d328 100644 --- a/ocaml/xapi/create_misc.ml +++ b/ocaml/xapi/create_misc.ml @@ -155,6 +155,7 @@ let read_localhost_info ~__context = try Some (Xapi_inventory.lookup k) with _ -> None in let this_host_name = Networking_info.get_hostname () in + Debug.set_backtrace_name this_host_name ; let open Xapi_inventory in let open Xenops_interface.Host in { diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 89e2c96a72e..16c3523139a 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -52,6 +52,13 @@ let create_localhost ~__context info = in (* me = None on firstboot only *) if me = None then + (* Restore the host.last_update_hash when this is an ejected host. *) + let last_update_hash = + let k = "last_update_hash" in + let x = Localdb.get k |> Option.value ~default:"" in + (try Localdb.del k with _ -> ()) ; + x + in let (_ : API.ref_host) = Xapi_host.create ~__context ~uuid:info.uuid ~name_label:info.hostname ~name_description:"" ~hostname:info.hostname ~address:ip @@ -59,7 +66,7 @@ let create_localhost ~__context info = ~external_auth_configuration:[] ~license_params:[] ~edition:"" ~license_server:[("address", "localhost"); ("port", "27000")] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false - ~last_software_update:Date.epoch ~last_update_hash:"" + ~last_software_update:Date.epoch ~last_update_hash ~ssh_enabled:Constants.default_ssh_enabled ~ssh_enabled_timeout:Constants.default_ssh_enabled_timeout ~ssh_expiry:Date.epoch diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 139bf4d0f09..085b678abde 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -95,7 +95,7 @@ xapi_aux xapi-stdext-std xapi-stdext-pervasives - xapi-backtrace + backtrace xapi-datamodel xapi-consts xapi_version @@ -196,7 +196,7 @@ uuidm x509 xapi_aux - xapi-backtrace + backtrace (re_export xapi-consts) xapi-consts.xapi_version xapi-client @@ -282,7 +282,7 @@ xapi-log xapi-stdext-encodings xapi-consts - xapi-backtrace + backtrace clock rpclib.json) (wrapped false)) @@ -306,7 +306,7 @@ tracing tracing_propagator unix - xapi-backtrace + backtrace xapi-client xapi-consts xapi-datamodel diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index e791dc72c3a..09a1f822d9c 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -126,6 +126,10 @@ let call_script ?(log_output = Always) ?env ?stdin ?timeout script args = (String.concat " " (filter_args args)) message stdout stderr ; raise e + | e -> + debug "%s: unexpected exception raised: %s" __FUNCTION__ + (ExnHelper.string_of_exn e) ; + raise e (** Construct a descriptive network name (used as name_label) for a give network interface. *) let choose_network_name_for_pif device = function diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index 6ec72d296ca..c768ee966a7 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -2076,57 +2076,37 @@ let handlers = ; (Datamodel_common._vtpm, VTPMHandler.handle) ] -(* If snapshot_of(x) = y, and both x and y are freshly imported VDIs, - the field must be rewritten to a real database reference. The same - relationship formed by parental links must be preserved as well. - - We can't do this on-the-fly during an import because we can't - guarantee that the VDIs will be imported in reverse topological - order of the relation induced by snapshot_of. *) -let update_vdi_links ~__context state = - (* Map an exported reference to a (freshly) imported reference, or - null if it's not present in the import. *) - let resolve_import = - let tbl = Hashtbl.create 16 in - let go (_, a, b) = Hashtbl.replace tbl a (Ref.of_string b) in - List.iter go state.table ; - fun r -> - Hashtbl.find_opt tbl (Ref.string_of r) |> Option.value ~default:Ref.null - in - let update x = - let x_r = Db.VDI.get_record ~__context ~self:x in - let parent = resolve_import x_r.API.vDI_parent in - let snapshot_of = resolve_import x_r.API.vDI_snapshot_of in - Db.VDI.set_parent ~__context ~self:x ~value:parent ; - Db.VDI.set_snapshot_of ~__context ~self:x ~value:snapshot_of ; - Db.VDI.set_is_a_snapshot ~__context ~self:x ~value:(snapshot_of <> Ref.null) - in - let go (cls, _, r) = - if cls = Datamodel_common._vdi then update (Ref.of_string r) - in - List.iter go state.table - -(* Same as [update_vdi_links] but over VMs instead. *) -let update_vm_links ~__context state = - let resolve_import = - let tbl = Hashtbl.create 16 in - let go (_, a, b) = Hashtbl.replace tbl a (Ref.of_string b) in - List.iter go state.table ; - fun r -> - Hashtbl.find_opt tbl (Ref.string_of r) |> Option.value ~default:Ref.null - in - let update x = - let x_r = Db.VM.get_record ~__context ~self:x in - let parent = resolve_import x_r.API.vM_parent in - let snapshot_of = resolve_import x_r.API.vM_snapshot_of in - Db.VM.set_parent ~__context ~self:x ~value:parent ; - Db.VM.set_snapshot_of ~__context ~self:x ~value:snapshot_of ; - Db.VM.set_is_a_snapshot ~__context ~self:x ~value:(snapshot_of <> Ref.null) - in - let go (cls, _, r) = - if cls = Datamodel_common._vm then update (Ref.of_string r) +let update_snapshot_and_parent_links ~__context state = + let aux (cls, _, ref) = + let ref = Ref.of_string ref in + ( if + cls = Datamodel_common._vm + && Db.VM.get_is_a_snapshot ~__context ~self:ref + then + let snapshot_of = Db.VM.get_snapshot_of ~__context ~self:ref in + if snapshot_of <> Ref.null then ( + debug "lookup for snapshot_of = '%s'" (Ref.string_of snapshot_of) ; + log_reraise + ("Failed to find the VM which is snapshot of " + ^ Db.VM.get_name_label ~__context ~self:ref + ) + (fun table -> + let snapshot_of = (lookup snapshot_of) table in + Db.VM.set_snapshot_of ~__context ~self:ref ~value:snapshot_of + ) + state.table + ) + ) ; + if cls = Datamodel_common._vm then ( + let parent = Db.VM.get_parent ~__context ~self:ref in + debug "lookup for parent = '%s'" (Ref.string_of parent) ; + try + let parent = lookup parent state.table in + Db.VM.set_parent ~__context ~self:ref ~value:parent + with _ -> debug "no parent found" + ) in - List.iter go state.table + List.iter aux state.table let check_references ~__context (table : table) = let is_export_reference r = @@ -2174,8 +2154,7 @@ let handle_all __context config rpc session_id (xs : obj list) = false in if not dry_run then ( - update_vm_links ~__context state ; - update_vdi_links ~__context state ; + update_snapshot_and_parent_links ~__context state ; check_references ~__context state.table ) ; state diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 83786bd8afd..49c1ebe2c96 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -141,7 +141,14 @@ let do_op_on_common ~local_fn ~__context ~host ~remote_fn f = local_fn ~__context else let task_opt = set_forwarding_on_task ~__context ~host in - f __context host task_opt remote_fn + try f __context host task_opt remote_fn + with Api_errors.Server_error (_, _) as e -> ( + match task_opt with + | None -> + raise e + | Some task -> + TaskHelper.reraise ~__context ~task e + ) with | ( Xmlrpc_client.Connection_reset | Http_client.Http_request_rejected _ diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 0fe9383c737..17657c752f6 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -89,11 +89,17 @@ let exec_with_context ~__context ~need_complete ?marshaller ?f_forward ?quiet f with | Api_errors.Server_error (a, _) as e when a = Api_errors.task_cancelled -> Backtrace.is_important e ; - if need_complete then TaskHelper.cancel ~__context ; + if need_complete then + TaskHelper.cancel ~__context + else + TaskHelper.store_backtrace ~__context e ; raise e | e -> Backtrace.is_important e ; - if need_complete then TaskHelper.failed ~__context e ; + if need_complete then + TaskHelper.failed ~__context e + else + TaskHelper.store_backtrace ~__context e ; raise e in let@ () = diff --git a/ocaml/xapi/static_vdis.ml b/ocaml/xapi/static_vdis.ml index 119db57e89c..67078a7561b 100644 --- a/ocaml/xapi/static_vdis.ml +++ b/ocaml/xapi/static_vdis.ml @@ -23,30 +23,26 @@ include Xapi_database.Static_vdis_list (** Generate the static configuration and attach the VDI now *) let permanent_vdi_attach ~__context ~vdi ~reason = - info "permanent_vdi_attach: vdi = %s; sr = %s" (Ref.string_of vdi) + let uuid = Db.VDI.get_uuid ~__context ~self:vdi in + info "permanent_vdi_attach: vdi = %s ( %s ); sr = %s" (Ref.string_of vdi) uuid (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi)) ; (* Disallow attaching VDIs that only have changed block tracking metadata *) if Db.VDI.get_type ~__context ~self:vdi = `cbt_metadata then ( error "Static_vdis.permanent_vdi_attach: the given VDI has type cbt_metadata" ; - raise - (Api_errors.Server_error - ( Api_errors.vdi_incompatible_type - , [Ref.string_of vdi; Record_util.vdi_type_to_string `cbt_metadata] - ) - ) + let param = + [Ref.string_of vdi; Record_util.vdi_type_to_string `cbt_metadata] + in + raise Api_errors.(Server_error (vdi_incompatible_type, param)) ) ; ignore (Helpers.call_script ~timeout:Mtime.Span.(1 * min) - !Xapi_globs.static_vdis - ["add"; Db.VDI.get_uuid ~__context ~self:vdi; reason] + !Xapi_globs.static_vdis ["add"; uuid; reason] ) ; (* VDI will be attached on next boot; attach it now too *) Xapi_stdext_std.Xstringext.String.rtrim - (Helpers.call_script !Xapi_globs.static_vdis - ["attach"; Db.VDI.get_uuid ~__context ~self:vdi] - ) + (Helpers.call_script !Xapi_globs.static_vdis ["attach"; uuid]) (** Detach the VDI (by reference) now and destroy the static configuration *) let permanent_vdi_detach ~__context ~vdi = diff --git a/ocaml/xapi/taskHelper.ml b/ocaml/xapi/taskHelper.ml index dca8c9e7ca0..1990ac2ce7a 100644 --- a/ocaml/xapi/taskHelper.ml +++ b/ocaml/xapi/taskHelper.ml @@ -262,6 +262,18 @@ let cancel ~__context = let@ self = operate_on_db_task ~__context in cancel_this ~__context ~self +let store_backtrace ~__context exn = + D.log_and_ignore_exn @@ fun () -> + let@ self = operate_on_db_task ~__context in + let status = Db_actions.DB_Action.Task.get_status ~__context ~self in + match status with + | `pending -> + (* store backtrace, for message-forwarding to read on the coordinator *) + Db_actions.DB_Action.Task.set_backtrace ~__context ~self + ~value:(Sexplib.Sexp.to_string Backtrace.(sexp_of_t (get exn))) + | _ -> + () + let failed ~__context exn = let backtrace = Printexc.get_raw_backtrace () in let@ () = finally_complete_tracing ~error:(exn, backtrace) __context in @@ -293,6 +305,24 @@ let failed ~__context exn = "`failure" ) +let reraise ~__context ~task exn = + Backtrace.is_important exn ; + let () = + D.log_and_ignore_exn @@ fun () -> + (* best-effort: retrieve existing backtrace and join with local *) + let remote_bt = + Db_actions.DB_Action.Task.get_backtrace ~__context ~self:task + |> Sexplib.Sexp.of_string + |> Backtrace.t_of_sexp + in + let local_bt = Backtrace.remove exn in + (* start with remote Backtrace *) + Backtrace.add exn remote_bt ; + (* add back local *) + Backtrace.add exn local_bt + in + raise exn + type id = Sm of string | Xenops of string * string (* queue name * id *) diff --git a/ocaml/xapi/taskHelper.mli b/ocaml/xapi/taskHelper.mli index 1c4d5381586..c1c7ff38504 100644 --- a/ocaml/xapi/taskHelper.mli +++ b/ocaml/xapi/taskHelper.mli @@ -55,9 +55,13 @@ val cancel_this : __context:Context.t -> self:API.ref_task -> unit val cancel : __context:Context.t -> unit +val store_backtrace : __context:Context.t -> exn -> unit + val failed : __context:Context.t -> exn -> unit (** Call this when a task fails with [exn] *) +val reraise : __context:Context.t -> task:API.ref_task -> exn -> 'a + val init : unit -> unit val rbac_assert_permission_fn : diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index ef542ec53a1..738627d14b7 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -24,6 +24,7 @@ open D module Rrdd = Rrd_client.Client module Date = Clock.Date module Delay = Xapi_stdext_threads.Threadext.Delay +module Listext = Xapi_stdext_std.Listext.List module Unixext = Xapi_stdext_unix.Unixext let finally = Xapi_stdext_pervasives.Pervasiveext.finally @@ -1274,34 +1275,32 @@ let attach_statefiles ~__context statevdis = (* First GC any existing statefiles: these are not needed any more *) info "Detaching any existing statefiles: these are not needed any more" ; Xha_statefile.detach_existing_statefiles ~__context ; - let paths = ref [] in - (let cur_vdi_str = ref "" in - try - List.iter - (fun vdi -> - cur_vdi_str := Ref.string_of vdi ; - info "Attempting to permanently attach statefile VDI: %s" - (Ref.string_of vdi) ; - paths := - Static_vdis.permanent_vdi_attach ~__context ~vdi - ~reason:Xha_statefile.reason - :: !paths - ) - statevdis - with e -> - error "Caught exception attaching statefile: %s" (ExnHelper.string_of_exn e) ; - List.iter - (fun vdi -> - Helpers.log_exn_continue - (Printf.sprintf "detaching statefile: %s" (Ref.string_of vdi)) - (fun () -> Static_vdis.permanent_vdi_detach ~__context ~vdi) - () - ) - statevdis ; - raise - (Api_errors.Server_error (Api_errors.vdi_not_available, [!cur_vdi_str])) - ) ; - !paths + let attach vdi = + let uuid = Db.VDI.get_uuid ~__context ~self:vdi in + info "Attempting to permanently attach statefile VDI: %s" uuid ; + try + let path = + Static_vdis.permanent_vdi_attach ~__context ~vdi + ~reason:Xha_statefile.reason + in + Ok (path, (vdi, uuid)) + with e -> Error (e, (vdi, uuid)) + in + let detach_after_error (vdi, uuid) = + Helpers.log_exn_continue + (Printf.sprintf "%s: statefile VDI %s" __FUNCTION__ uuid) + (fun () -> Static_vdis.permanent_vdi_detach ~__context ~vdi) + () + in + match Listext.try_map_collect attach statevdis with + | Ok paths_and_vdis -> + List.map fst paths_and_vdis + | Error (paths_and_vdis, (e, ((_, uuid_e) as vdi_e))) -> + error "%s: Unable to attach VDI %s. Reason: %s" __FUNCTION__ uuid_e + (ExnHelper.string_of_exn e) ; + let maybe_detachable = vdi_e :: List.map snd paths_and_vdis in + List.iter detach_after_error maybe_detachable ; + raise Api_errors.(Server_error (vdi_not_available, [uuid_e])) (** Attach the metadata VDI and return the resulting path in dom0 *) let attach_metadata_vdi ~__context vdi = diff --git a/ocaml/xapi/xapi_mgmt_iface.ml b/ocaml/xapi/xapi_mgmt_iface.ml index 3f055037157..1678f68eedb 100644 --- a/ocaml/xapi/xapi_mgmt_iface.ml +++ b/ocaml/xapi/xapi_mgmt_iface.ml @@ -245,6 +245,7 @@ let on_dom0_networking_change ~__context = let localhost = Helpers.get_localhost ~__context in if Db.Host.get_hostname ~__context ~self:localhost <> new_hostname then ( debug "Changing Host.hostname in database to: %s" new_hostname ; + Debug.set_backtrace_name new_hostname ; Db.Host.set_hostname ~__context ~self:localhost ~value:new_hostname ) ; if diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 5214fb3998a..2901828c0c0 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -1963,6 +1963,7 @@ let eject_self ~__context ~host = (Db.Pool.get_current_operations ~__context ~self:pool) then raise Api_errors.(Server_error (not_supported_during_upgrade, [])) ; + let last_update_hash = Db.Host.get_last_update_hash ~__context ~self:host in if Pool_role.is_master () then raise Cannot_eject_master else (* Fail the operation if any VMs are running here (except dom0) *) @@ -2093,6 +2094,8 @@ let eject_self ~__context ~host = pif.API.pIF_ipv6_configuration_mode |> String.uncapitalize_ascii in + Localdb.put "last_update_hash" last_update_hash ; + debug "Saved last_update_hash %S to localdb." last_update_hash ; let write_first_boot_management_interface_configuration_file () = (* During firstboot, now inventory has an empty MANAGEMENT_INTERFACE *) let bridge = "" in @@ -3705,8 +3708,16 @@ let set_repositories ~__context ~self ~value = ) value ; Db.Pool.set_repositories ~__context ~self ~value ; - if Db.Pool.get_repositories ~__context ~self = [] then + if Db.Pool.get_repositories ~__context ~self = [] then ( Db.Pool.set_last_update_sync ~__context ~self ~value:Date.epoch ; + (* The host.latest_synced_updates_applied can't be refreshed by + pool.sync_updates. So reset it here. *) + Db.Host.get_all ~__context + |> List.iter (fun h -> + Db.Host.set_latest_synced_updates_applied ~__context ~self:h + ~value:`unknown + ) + ) ; disable_unsupported_periodic_sync_updates ~__context ~self ~repos:value let add_repository ~__context ~self ~value = diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index fe861ab6c43..1fa6fd8ff8c 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -3446,19 +3446,17 @@ let events_from_xapi () = if resident_here then Xenopsd_metadata.update ~__context ~self:vm |> ignore - with e -> - if not (Db.is_valid_ref __context vm) then + with + | e when not (Db.is_valid_ref __context vm) -> debug "VM %s has been removed: event on it will be \ - ignored" - (Ref.string_of vm) - else ( + ignored: %S" + (Ref.string_of vm) (Printexc.to_string e) + | e -> error "Caught %s while processing XenAPI event for VM \ %s" - (Printexc.to_string e) (Ref.string_of vm) ; - raise e - ) + (Printexc.to_string e) (Ref.string_of vm) ) | {ty= "host"; reference= t; _} when t = localhost' -> debug diff --git a/ocaml/xcp-rrdd/bin/rrdd/dune b/ocaml/xcp-rrdd/bin/rrdd/dune index 5dc5bcd1b3b..feefd301be8 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/dune +++ b/ocaml/xcp-rrdd/bin/rrdd/dune @@ -19,7 +19,7 @@ threads.posix unix uuid - xapi-backtrace + backtrace xapi-consts xapi-idl.network xapi-idl.rrd @@ -57,7 +57,7 @@ threads.posix unix uuid - xapi-backtrace + backtrace xapi-consts.xapi_version xapi-idl xapi-idl.network diff --git a/ocaml/xe-cli/dune b/ocaml/xe-cli/dune index b61ec3cde63..e956ce94ee5 100644 --- a/ocaml/xe-cli/dune +++ b/ocaml/xe-cli/dune @@ -14,7 +14,7 @@ unix uri yojson - xapi-backtrace + backtrace xapi-cli-protocol xapi-stdext-pervasives xapi-stdext-std diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index d3801af105e..fa95520059b 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -75,7 +75,7 @@ exception Usage let usage () = let help = Printf.sprintf - {|Usage: + {|Usage: %s [ -s ] XenServer host [ -p ] XenServer port number @@ -84,7 +84,9 @@ let usage () = [ --nossl ] Disable SSL/TLS [ --debug ] Enable debug output [ --debug-on-fail ] Enable debug output only on failure + [ --minimal ] Output only values in a comma-separated lists [ --traceparent ] Distributed tracing context + [ --trace ] Show task stacktrace on failure [ ... ] Command-specific options A full list of commands can be obtained by running diff --git a/ocaml/xenopsd/lib/dune b/ocaml/xenopsd/lib/dune index dbd77e9a5b1..8b211442f59 100644 --- a/ocaml/xenopsd/lib/dune +++ b/ocaml/xenopsd/lib/dune @@ -27,7 +27,7 @@ uuidm uutf threads.posix - xapi-backtrace + backtrace xapi_version xapi-idl xapi-idl.storage diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 3196e949a91..3db369d0d57 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -2022,6 +2022,9 @@ and parallel_atomic ~progress_callback ~description ~nested atoms t = | Some (Task.Completed _) -> TASK.destroy' id ; None | Some (Task.Failed e) -> + let backtrace = + Xenops_task.backtrace_of (Xenops_task.handle_of_id tasks id) + in TASK.destroy' id ; let e = match Rpcmarshal.unmarshal Errors.error.Rpc.Types.ty e with @@ -2030,7 +2033,7 @@ and parallel_atomic ~progress_callback ~description ~nested atoms t = | Error (`Msg x) -> internal_error "Error unmarshalling failure: %s" x in - Some e + Backtrace.add e backtrace ; Some e | None | Some (Task.Pending _) -> (* Because pending tasks are filtered out in queue_atomics_and_wait with task_ended the second case will @@ -2148,12 +2151,14 @@ let rec immediate_operation dbg _id op = | Task.Completed _ -> () | Task.Failed e -> ( - match Rpcmarshal.unmarshal Errors.error.Rpc.Types.ty e with - | Ok e -> - raise (Xenopsd_error e) - | Error (`Msg m) -> - internal_error "Failed to unmarshal error: %s" m - ) + let backtrace = Xenops_task.backtrace_of task in + match Rpcmarshal.unmarshal Errors.error.Rpc.Types.ty e with + | Ok e -> + let e = Xenopsd_error e in + Backtrace.add e backtrace ; raise e + | Error (`Msg m) -> + internal_error "Failed to unmarshal error: %s" m + ) (* At all times we ensure that an operation which partially fails leaves the system in a recoverable state. All that should be necessary is to call the diff --git a/ocaml/xenopsd/xc/dune b/ocaml/xenopsd/xc/dune index 2c10982df03..818e6ef203a 100644 --- a/ocaml/xenopsd/xc/dune +++ b/ocaml/xenopsd/xc/dune @@ -36,7 +36,7 @@ uri uuid uuidm - xapi-backtrace + backtrace xapi-idl xapi-idl.memory xapi-idl.network diff --git a/opam/forkexec.opam b/opam/forkexec.opam index cf43e8d76ae..f4e1f7377cc 100644 --- a/opam/forkexec.opam +++ b/opam/forkexec.opam @@ -15,7 +15,6 @@ depends: [ "ppx_deriving_rpc" "rpclib" "uuid" {= version} - "xapi-backtrace" "xapi-log" {= version} "xapi-stdext-pervasives" {= version} "xapi-stdext-unix" {= version} diff --git a/opam/http-lib.opam b/opam/http-lib.opam index 900b5593c01..cde78b82bb3 100644 --- a/opam/http-lib.opam +++ b/opam/http-lib.opam @@ -26,7 +26,6 @@ depends: [ "tgroup" "uri" "uuid" {= version} - "xapi-backtrace" "xapi-idl" {= version} "xapi-log" {= version} "xapi-stdext-pervasives" {= version} diff --git a/opam/safe-resources.opam b/opam/safe-resources.opam index e4bd199a640..34ae19ca32e 100644 --- a/opam/safe-resources.opam +++ b/opam/safe-resources.opam @@ -17,7 +17,6 @@ depends: [ "dune" {>= "3.15"} "fmt" "logs" - "xapi-backtrace" "xapi-stdext-pervasives" "xapi-stdext-threads" "alcotest" {with-test} diff --git a/opam/safe-resources.opam.template b/opam/safe-resources.opam.template index ae64f0c2d53..8e0c6e362d2 100644 --- a/opam/safe-resources.opam.template +++ b/opam/safe-resources.opam.template @@ -14,7 +14,6 @@ depends: [ "dune" {>= "3.15"} "fmt" "logs" - "xapi-backtrace" "xapi-stdext-pervasives" "xapi-stdext-threads" "alcotest" {with-test} diff --git a/opam/xapi-datamodel.opam b/opam/xapi-datamodel.opam index e2da59cee81..8a71ecdb2d9 100644 --- a/opam/xapi-datamodel.opam +++ b/opam/xapi-datamodel.opam @@ -20,7 +20,6 @@ depends: [ "rpclib" "base-threads" "sexplib0" - "xapi-backtrace" "xapi-consts" "xapi-schema" "clock" diff --git a/opam/xapi-datamodel.opam.template b/opam/xapi-datamodel.opam.template index e7bd9361b7e..0c358325eee 100644 --- a/opam/xapi-datamodel.opam.template +++ b/opam/xapi-datamodel.opam.template @@ -17,7 +17,6 @@ depends: [ "rpclib" "base-threads" "sexplib0" - "xapi-backtrace" "xapi-consts" "xapi-schema" "clock" diff --git a/opam/xapi-debug.opam b/opam/xapi-debug.opam index 26871b97a7e..9febe70fb5f 100644 --- a/opam/xapi-debug.opam +++ b/opam/xapi-debug.opam @@ -53,7 +53,6 @@ depends: [ "uuidm" "uutf" "x509" - "xapi-backtrace" "xapi-log" "xapi-types" "xapi-stdext-pervasives" diff --git a/opam/xapi-idl.opam b/opam/xapi-idl.opam index ad3a9e5da67..82e97cefe20 100644 --- a/opam/xapi-idl.opam +++ b/opam/xapi-idl.opam @@ -38,7 +38,6 @@ depends: [ "sexplib0" "uri" "uuidm" - "xapi-backtrace" "xapi-open-uri" "xapi-rrd" "clock" diff --git a/opam/xapi-idl.opam.template b/opam/xapi-idl.opam.template index d6d5deb53fa..343ac46ded9 100644 --- a/opam/xapi-idl.opam.template +++ b/opam/xapi-idl.opam.template @@ -35,7 +35,6 @@ depends: [ "sexplib0" "uri" "uuidm" - "xapi-backtrace" "xapi-open-uri" "xapi-rrd" "clock" diff --git a/opam/xapi-log.opam b/opam/xapi-log.opam index 4c77faffce0..63e2e0f1b54 100644 --- a/opam/xapi-log.opam +++ b/opam/xapi-log.opam @@ -14,7 +14,6 @@ depends: [ "fmt" "logs" "mtime" - "xapi-backtrace" "xapi-stdext-pervasives" {= version} "odoc" {with-doc} ] diff --git a/opam/xapi-open-uri.opam b/opam/xapi-open-uri.opam index 0823c9c1059..daaa84fa80a 100644 --- a/opam/xapi-open-uri.opam +++ b/opam/xapi-open-uri.opam @@ -18,7 +18,6 @@ depends: [ "dune" {>= "3.15"} "stunnel" "uri" - "xapi-backtrace" "xapi-stdext-pervasives" ] synopsis: "Library required by xapi" diff --git a/opam/xapi-open-uri.opam.template b/opam/xapi-open-uri.opam.template index 4e3ec18d413..060358ea9bc 100644 --- a/opam/xapi-open-uri.opam.template +++ b/opam/xapi-open-uri.opam.template @@ -15,7 +15,6 @@ depends: [ "dune" {>= "3.15"} "stunnel" "uri" - "xapi-backtrace" "xapi-stdext-pervasives" ] synopsis: "Library required by xapi" diff --git a/opam/xapi-stdext-pervasives.opam b/opam/xapi-stdext-pervasives.opam index 8e8ee0c71f7..45483be4373 100644 --- a/opam/xapi-stdext-pervasives.opam +++ b/opam/xapi-stdext-pervasives.opam @@ -11,7 +11,6 @@ depends: [ "ocaml" {>= "4.08"} "logs" "odoc" {with-doc} - "xapi-backtrace" ] build: [ ["dune" "subst"] {dev} diff --git a/opam/xapi-stdext-unix.opam b/opam/xapi-stdext-unix.opam index 63c36e4af94..637b8e01acb 100644 --- a/opam/xapi-stdext-unix.opam +++ b/opam/xapi-stdext-unix.opam @@ -20,7 +20,6 @@ depends: [ "logs" {with-test} "qcheck-core" {>= "0.21.2" & with-test} "odoc" {with-doc} - "xapi-backtrace" "unix-errno" "xapi-stdext-pervasives" {= version} "polly" diff --git a/opam/xapi.opam b/opam/xapi.opam index 34dd9af3b0d..1c0f62a6cee 100644 --- a/opam/xapi.opam +++ b/opam/xapi.opam @@ -70,7 +70,6 @@ depends: [ "uutf" "uuidm" "x509" - "xapi-backtrace" "xapi-client" {= version} "xapi-cli-protocol" {= version} "xapi-consts" {= version} diff --git a/opam/xe.opam b/opam/xe.opam index 81da07a0899..19033b062c1 100644 --- a/opam/xe.opam +++ b/opam/xe.opam @@ -19,7 +19,6 @@ depends: [ "fpath" "stunnel" "uri" - "xapi-backtrace" "xapi-cli-protocol" "xapi-consts" "xapi-datamodel" diff --git a/opam/xe.opam.template b/opam/xe.opam.template index fb95826fa60..929fd124c7f 100644 --- a/opam/xe.opam.template +++ b/opam/xe.opam.template @@ -16,7 +16,6 @@ depends: [ "fpath" "stunnel" "uri" - "xapi-backtrace" "xapi-cli-protocol" "xapi-consts" "xapi-datamodel" diff --git a/quality-gate.sh b/quality-gate.sh index 8778b15a03f..33d80936dac 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=239 + N=241 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" @@ -44,7 +44,7 @@ mli-files () { } structural-equality () { - N=7 + N=8 EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$EQ" -eq "$N" ]; then echo "OK counted $EQ usages of ' == '" diff --git a/unixpwd/c/unixpwd_stubs.c b/unixpwd/c/unixpwd_stubs.c index 00ef3b10889..7b7046940d7 100644 --- a/unixpwd/c/unixpwd_stubs.c +++ b/unixpwd/c/unixpwd_stubs.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "unixpwd.h" @@ -33,7 +34,9 @@ caml_unixpwd_getpwd(value caml_user) CAMLlocal1(pw); user = String_val(caml_user); + caml_release_runtime_system(); passwd = unixpwd_getpwd(user); + caml_acquire_runtime_system(); if (passwd == NULL && errno != 0) caml_failwith(strerror(errno)); if (passwd == NULL) @@ -53,7 +56,9 @@ caml_unixpwd_getspw(value caml_user) CAMLlocal1(pw); user = String_val(caml_user); + caml_release_runtime_system(); passwd = unixpwd_getspw(user); + caml_acquire_runtime_system(); if (passwd == NULL && errno != 0) caml_failwith(strerror(errno)); if (passwd == NULL) @@ -75,7 +80,9 @@ caml_unixpwd_get(value caml_user) CAMLlocal1(pw); user = String_val(caml_user); + caml_release_runtime_system(); passwd = unixpwd_get(user); + caml_acquire_runtime_system(); if (passwd == NULL && errno != 0) caml_failwith(strerror(errno)); if (passwd == NULL) @@ -96,7 +103,11 @@ caml_unixpwd_setpwd(value caml_user, value caml_password) user = String_val(caml_user); password = caml_stat_strdup(String_val(caml_password)); + + caml_release_runtime_system(); rc = unixpwd_setpwd(user, password); + caml_acquire_runtime_system(); + caml_stat_free(password); if (rc != 0) caml_failwith(strerror(rc)); @@ -113,7 +124,9 @@ caml_unixpwd_setspw(value caml_user, value caml_password) user = String_val(caml_user); password = caml_stat_strdup(String_val(caml_password)); + caml_release_runtime_system(); rc = unixpwd_setspw(user, password); + caml_acquire_runtime_system(); caml_stat_free(password); if (rc != 0) caml_failwith(strerror(rc));