Skip to content

Commit ec70db5

Browse files
authored
CP-53470 Additional traces around xenopsd VM_migrate & rejoin traceparents in requests forwarded to xenopsd by xapi (#6275)
Reconnected VM_receive_memory span to VM.migrate: When xapi passes the traceparent header to xenopsd it is serialised under the additional_headers field of the json body, updated xenopsd to expect that instead of sourcing it from the non existent traceparent field Added some additional spans around vm migration critical section including the specific point we receive the suspend from emu-manager/xenguest - this required moving the with_tracing function to a more common location
2 parents e8a34e9 + 6587b75 commit ec70db5

File tree

8 files changed

+76
-45
lines changed

8 files changed

+76
-45
lines changed

ocaml/xenopsd/lib/xenops_migrate.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ module Forwarded_http_request = struct
9595
type t = {
9696
uri: string
9797
; query: (string * string) list
98-
; traceparent: string option
98+
; additional_headers: (string * string) list
9999
; cookie: (string * string) list
100100
; body: string option
101101
}

ocaml/xenopsd/lib/xenops_server.ml

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,28 +1835,6 @@ let rec atomics_of_operation = function
18351835
| _ ->
18361836
[]
18371837

1838-
let with_tracing ~name ~task f =
1839-
let open Tracing in
1840-
let parent = Xenops_task.tracing task in
1841-
let tracer = Tracer.get_tracer ~name in
1842-
match Tracer.start ~tracer ~name ~parent () with
1843-
| Ok span -> (
1844-
Xenops_task.set_tracing task span ;
1845-
try
1846-
let result = f () in
1847-
ignore @@ Tracer.finish span ;
1848-
Xenops_task.set_tracing task parent ;
1849-
result
1850-
with exn ->
1851-
let backtrace = Printexc.get_raw_backtrace () in
1852-
let error = (exn, backtrace) in
1853-
ignore @@ Tracer.finish span ~error ;
1854-
raise exn
1855-
)
1856-
| Error e ->
1857-
warn "Failed to start tracing: %s" (Printexc.to_string e) ;
1858-
f ()
1859-
18601838
let rec perform_atomic ~progress_callback ?result (op : atomic)
18611839
(t : Xenops_task.task_handle) : unit =
18621840
let module B = (val get_backend () : S) in

ocaml/xenopsd/lib/xenops_task.ml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
open Xenops_utils
22

3+
module D = Debug.Make (struct let name = __MODULE__ end)
4+
35
module XI = struct
46
include Xenops_interface
57

@@ -89,3 +91,25 @@ let traceparent_header_of_task t =
8991
)
9092
(Xenops_task.tracing t)
9193
|> Option.to_list
94+
95+
let with_tracing ~name ~task f =
96+
let open Tracing in
97+
let parent = Xenops_task.tracing task in
98+
let tracer = Tracer.get_tracer ~name in
99+
match Tracer.start ~tracer ~name ~parent () with
100+
| Ok span -> (
101+
Xenops_task.set_tracing task span ;
102+
try
103+
let result = f () in
104+
let _ : (Span.t option, exn) result = Tracer.finish span in
105+
Xenops_task.set_tracing task parent ;
106+
result
107+
with exn ->
108+
let backtrace = Printexc.get_raw_backtrace () in
109+
let error = (exn, backtrace) in
110+
let _ : (Span.t option, exn) result = Tracer.finish span ~error in
111+
raise exn
112+
)
113+
| Error e ->
114+
D.warn "Failed to start tracing: %s" (Printexc.to_string e) ;
115+
f ()

ocaml/xenopsd/lib/xenopsd.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,8 @@ let handle_received_fd this_connection =
356356
let context = {Xenops_server.transferred_fd= Some received_fd} in
357357
let uri = Uri.of_string req.Xenops_migrate.Forwarded_http_request.uri in
358358
let traceparent =
359-
req.Xenops_migrate.Forwarded_http_request.traceparent
359+
List.assoc_opt "traceparent"
360+
req.Xenops_migrate.Forwarded_http_request.additional_headers
360361
in
361362
fn uri req.Xenops_migrate.Forwarded_http_request.cookie traceparent
362363
this_connection context

ocaml/xenopsd/xc/device.ml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2501,6 +2501,7 @@ module Backend = struct
25012501
let assert_can_suspend ~xs:_ _ = ()
25022502

25032503
let suspend (task : Xenops_task.task_handle) ~xs ~qemu_domid domid =
2504+
with_tracing ~task ~name:"Qemu_none.Dm.suspend" @@ fun () ->
25042505
Dm_Common.signal task ~xs ~qemu_domid ~domid "save" ~wait_for:"paused"
25052506

25062507
let stop ~xs:_ ~qemu_domid:_ ~vtpm:_ _ = ()
@@ -3180,7 +3181,8 @@ module Backend = struct
31803181

31813182
(* key not present *)
31823183

3183-
let suspend (_ : Xenops_task.task_handle) ~xs:_ ~qemu_domid:_ domid =
3184+
let suspend (task : Xenops_task.task_handle) ~xs:_ ~qemu_domid:_ domid =
3185+
with_tracing ~task ~name:"Qemu_upstream_compat.Dm.suspend" @@ fun () ->
31843186
let as_msg cmd = Qmp.(Success (Some __LOC__, cmd)) in
31853187
let perms = [Unix.O_WRONLY; Unix.O_CREAT] in
31863188
let save_file = sprintf qemu_save_path domid in
@@ -3860,7 +3862,8 @@ module Dm = struct
38603862
debug "Called Dm.restore_vgpu" ;
38613863
start_vgpu ~xc ~xs task ~restore:true domid vgpus vcpus profile
38623864

3863-
let suspend_varstored (_ : Xenops_task.task_handle) ~xs domid ~vm_uuid =
3865+
let suspend_varstored (task : Xenops_task.task_handle) ~xs domid ~vm_uuid =
3866+
with_tracing ~task ~name:"Dm.suspend_varstored" @@ fun () ->
38643867
debug "Called Dm.suspend_varstored (domid=%d)" domid ;
38653868
Service.Varstored.stop ~xs domid ;
38663869
Xenops_sandbox.Varstore_guard.read ~domid efivars_save_path ~vm_uuid
@@ -3877,6 +3880,7 @@ module Dm = struct
38773880
debug "Wrote EFI variables to %s (domid=%d)" path domid
38783881

38793882
let suspend_vtpm (task : Xenops_task.task_handle) ~xs domid ~vtpm =
3883+
with_tracing ~task ~name:"Dm.suspend_vtpm" @@ fun () ->
38803884
debug "Called Dm.suspend_vtpm (domid=%d)" domid ;
38813885
let dbg = Xenops_task.get_dbg task in
38823886
Option.map

ocaml/xenopsd/xc/domain.ml

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,22 +1703,23 @@ let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xc:_ ~xs ~domain_type
17031703
(string_of_message message) ;
17041704
match message with
17051705
| Suspend ->
1706-
do_suspend_callback () ;
1707-
if domain_type = `hvm then (
1708-
let vm_uuid = Uuidx.to_string uuid in
1709-
debug "VM = %s; domid = %d; suspending qemu-dm" vm_uuid domid ;
1710-
Device.Dm.suspend task ~xs ~qemu_domid ~dm domid ;
1711-
if is_uefi then
1712-
let (_ : string) =
1713-
Device.Dm.suspend_varstored task ~xs domid ~vm_uuid
1714-
in
1715-
let (_ : string list) =
1716-
Device.Dm.suspend_vtpm task ~xs domid ~vtpm
1717-
in
1718-
()
1706+
( with_tracing ~task ~name:"suspend_emu_manager Suspend" @@ fun () ->
1707+
do_suspend_callback () ;
1708+
if domain_type = `hvm then (
1709+
let vm_uuid = Uuidx.to_string uuid in
1710+
debug "VM = %s; domid = %d; suspending qemu-dm" vm_uuid domid ;
1711+
Device.Dm.suspend task ~xs ~qemu_domid ~dm domid ;
1712+
if is_uefi then
1713+
let (_ : string) =
1714+
Device.Dm.suspend_varstored task ~xs domid ~vm_uuid
1715+
in
1716+
let (_ : string list) =
1717+
Device.Dm.suspend_vtpm task ~xs domid ~vtpm
1718+
in
1719+
()
1720+
)
17191721
) ;
1720-
send_done cnx ;
1721-
wait_for_message ()
1722+
send_done cnx ; wait_for_message ()
17221723
| Prepare x when x = "xenguest" ->
17231724
debug "Writing Libxc header" ;
17241725
write_header main_fd (Libxc, 0L) >>= fun () ->
@@ -1741,6 +1742,7 @@ let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xc:_ ~xs ~domain_type
17411742
)
17421743
)
17431744
| Result _ ->
1745+
with_tracing ~task ~name:"suspend_emu_manager Result" @@ fun () ->
17441746
debug "VM = %s; domid = %d; emu-manager completed successfully"
17451747
(Uuidx.to_string uuid) domid ;
17461748
return ()

ocaml/xenopsd/xc/dune

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
xapi-stdext-std
5252
xapi-stdext-threads
5353
xapi-stdext-unix
54+
xapi-tracing
55+
xapi-tracing-export
5456
xapi_xenopsd
5557
xapi_xenopsd_c_stubs
5658
xapi_xenopsd_xc_c_stubs

ocaml/xenopsd/xc/xenops_server_xen.ml

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,15 +2499,35 @@ module VM = struct
24992499
None
25002500
in
25012501
let manager_path = choose_emu_manager vm.Vm.platformdata in
2502+
with_tracing ~task ~name:"VM_save_domain_suspend" @@ fun () ->
25022503
Domain.suspend task ~xc ~xs ~domain_type ~dm:(dm_of ~vm)
25032504
~vtpm:(vtpm_of ~vm) ~progress_callback ~qemu_domid ~manager_path
25042505
~is_uefi vm_str domid fd vgpu_fd flags' (fun () ->
2506+
with_tracing ~task ~name:"VM_save_domain_suspend_callback"
2507+
@@ fun () ->
25052508
(* SCTX-2558: wait more for ballooning if needed *)
2506-
wait_ballooning task vm ;
2507-
pre_suspend_callback task ;
2508-
if not (request_shutdown task vm Suspend 30.) then
2509+
( with_tracing ~task ~name:"VM_save_wait_ballooning" @@ fun () ->
2510+
wait_ballooning task vm
2511+
) ;
2512+
( with_tracing ~task ~name:"VM_save_pre_suspend_callback"
2513+
@@ fun () -> pre_suspend_callback task
2514+
) ;
2515+
2516+
if
2517+
not
2518+
( with_tracing ~task
2519+
~name:"VM_save_domain_suspend_callback_request_shutdown"
2520+
@@ fun () -> request_shutdown task vm Suspend 30.
2521+
)
2522+
then
25092523
raise (Xenopsd_error Failed_to_acknowledge_suspend_request) ;
2510-
if not (wait_shutdown task vm Suspend 1200.) then
2524+
if
2525+
not
2526+
( with_tracing ~task
2527+
~name:"VM_save_domain_suspend_callback_wait_shutdown"
2528+
@@ fun () -> wait_shutdown task vm Suspend 1200.
2529+
)
2530+
then
25112531
raise (Xenopsd_error (Failed_to_suspend (vm.Vm.id, 1200.)))
25122532
) ;
25132533
(* Record the final memory usage of the domain so we know how much

0 commit comments

Comments
 (0)