Skip to content

Commit e0b28c1

Browse files
committed
Avoid hanging qubesd on unresponsive domU shutdown
The action blocks either way as we have to await for them to complete, but the "xl" command does not block qubesd while "virsh" does, as it has a global lock. For: QubesOS/qubes-issues#10648 For: QubesOS/qubes-issues#10074
1 parent 3815893 commit e0b28c1

3 files changed

Lines changed: 123 additions & 6 deletions

File tree

qubes/tests/vm/mix/net.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# License along with this library; if not, see <https://www.gnu.org/licenses/>.
2121
#
2222
import ipaddress
23+
from unittest import mock
2324
from unittest.mock import patch
2425

2526
import qubes
@@ -395,7 +396,18 @@ def test_180_shutdown(self, mock_halted, mock_shutdown):
395396
with self.assertRaises(qubes.exc.QubesVMInUseError):
396397
self.loop.run_until_complete(vm.netvm.shutdown())
397398
vm.is_preload = True
398-
self.loop.run_until_complete(vm.netvm.shutdown())
399+
mock_proc = mock.AsyncMock()
400+
mock_proc.communicate.return_value = (b"", None)
401+
mock_proc.wait.return_value = 0
402+
mock_proc.returncode = 0
403+
with (
404+
patch.object(vm.app, "vmm") as mock_vmm,
405+
patch(
406+
"asyncio.create_subprocess_exec", return_value=mock_proc
407+
) as mock_async_sub,
408+
):
409+
mock_vmm.libvirt_conn.getType.return_value = "Xen"
410+
self.loop.run_until_complete(vm.netvm.shutdown())
399411

400412
def test_200_vmid_to_ipv4(self):
401413
testcases = (

qubes/tests/vm/qubesvm.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2950,6 +2950,74 @@ def test_626_qdb_keyboard_layout_change(
29502950
async def coroutine_mock(self, mock, *args, **kwargs):
29512951
return mock(*args, **kwargs)
29522952

2953+
@unittest.mock.patch("asyncio.create_subprocess_exec")
2954+
@unittest.mock.patch(
2955+
"qubes.vm.qubesvm.QubesVM.is_running", return_value=True
2956+
)
2957+
@unittest.mock.patch("qubes.vm.qubesvm.QubesVM.libvirt_domain")
2958+
@unittest.mock.patch(
2959+
"qubes.vm.qubesvm.QubesVM.is_halted", return_value=False
2960+
)
2961+
def test_650_shutdown(
2962+
self, mock_halted, mock_shutdown, mock_running, mock_async_sub
2963+
):
2964+
# pylint: disable=unused-argument
2965+
vm = self.get_vm()
2966+
mock_proc = unittest.mock.AsyncMock()
2967+
mock_proc.communicate.return_value = (b"", None)
2968+
mock_proc.returncode = 0
2969+
mock_proc.wait.return_value = mock_proc.returncode
2970+
mock_async_sub.return_value = mock_proc
2971+
with unittest.mock.patch.object(vm.app, "vmm") as mock_vmm:
2972+
with self.subTest("xen"):
2973+
mock_vmm.libvirt_conn.getType.return_value = "Xen"
2974+
self.loop.run_until_complete(vm.shutdown())
2975+
mock_async_sub.assert_called_once_with(
2976+
"xl",
2977+
"shutdown",
2978+
"-F",
2979+
vm.name,
2980+
stdin=unittest.mock.ANY,
2981+
stdout=unittest.mock.ANY,
2982+
stderr=unittest.mock.ANY,
2983+
)
2984+
2985+
mock_async_sub.reset_mock()
2986+
with self.subTest("xen-fail"):
2987+
mock_proc.communicate.return_value = (b"Oh no (xen)", None)
2988+
mock_proc.returncode = 1
2989+
mock_vmm.libvirt_conn.getType.return_value = "Xen"
2990+
with self.assertRaises(qubes.exc.QubesVMShutdownTimeoutError):
2991+
self.loop.run_until_complete(vm.shutdown())
2992+
2993+
mock_async_sub.reset_mock()
2994+
with self.subTest("not-xen"):
2995+
mock_proc.returncode = 0
2996+
uri = "kvm:///"
2997+
mock_vmm.libvirt_conn.getType.return_value = "NotXen"
2998+
mock_vmm.libvirt_conn.getURI.return_value = uri
2999+
self.loop.run_until_complete(vm.shutdown())
3000+
mock_async_sub.assert_called_once_with(
3001+
"virsh",
3002+
"-c",
3003+
uri,
3004+
"shutdown",
3005+
vm.name,
3006+
stdin=unittest.mock.ANY,
3007+
stdout=unittest.mock.ANY,
3008+
stderr=unittest.mock.ANY,
3009+
)
3010+
3011+
mock_async_sub.reset_mock()
3012+
with self.subTest("not-xen-fail"):
3013+
mock_proc.communicate.return_value = (b"Oh no (not-xen)", None)
3014+
mock_proc.returncode = 1
3015+
uri = "kvm:///"
3016+
mock_vmm.libvirt_conn.getType.return_value = "NotXen"
3017+
mock_vmm.libvirt_conn.getURI.return_value = uri
3018+
with self.assertRaises(qubes.exc.QubesVMShutdownTimeoutError):
3019+
self.loop.run_until_complete(vm.shutdown())
3020+
29533021
@unittest.mock.patch("asyncio.create_subprocess_exec")
29543022
def test_700_run_service(self, mock_subprocess):
29553023
start_mock = unittest.mock.AsyncMock()

qubes/vm/qubesvm.py

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# pylint; disable=too-many-lines
12
#
23
# The Qubes OS Project, https://www.qubes-os.org/
34
#
@@ -1661,14 +1662,50 @@ async def shutdown(self, force=False, wait=False, timeout=None):
16611662
if self.is_paused():
16621663
self.libvirt_domain.destroy()
16631664
else:
1665+
action = "shutdown"
1666+
timeout_exc = qubes.exc.QubesVMShutdownTimeoutError(self)
1667+
# Some libvirt actions have a global lock on a domain, blocking
1668+
# a lot of operations and even qubesd. When possible to act
1669+
# without it, do so to avoid the whole qubesd hanging.
1670+
if self.app.vmm.libvirt_conn.getType() == "Xen":
1671+
command = ["xl", action, "-F", self.name]
1672+
else:
1673+
# or: self.libvirt_domain.shutdown() without subprocess
1674+
uri = self.app.vmm.libvirt_conn.getURI()
1675+
command = ["virsh", "-c", uri, action, self.name]
16641676
try:
1665-
self.libvirt_domain.shutdown()
1677+
proc = await asyncio.create_subprocess_exec(
1678+
*command,
1679+
stdin=asyncio.subprocess.DEVNULL,
1680+
stdout=asyncio.subprocess.PIPE,
1681+
stderr=asyncio.subprocess.STDOUT,
1682+
)
1683+
stdout, _ = await proc.communicate()
1684+
await proc.wait()
1685+
if proc.returncode:
1686+
raise subprocess.CalledProcessError(
1687+
proc.returncode,
1688+
command,
1689+
output=stdout,
1690+
)
1691+
except subprocess.CalledProcessError as e:
1692+
self.log.error(
1693+
"Attempted {!s} with subprocess but exited with "
1694+
"error code: {!s}: {!r}".format(
1695+
action,
1696+
e.returncode,
1697+
qubes.utils.sanitize_stderr_for_log(e.output),
1698+
)
1699+
)
1700+
raise timeout_exc
16661701
except libvirt.libvirtError as e:
1667-
if e.get_error_code() == libvirt.VIR_ERR_INTERNAL_ERROR:
1668-
raise qubes.exc.QubesVMShutdownTimeoutError(self)
1669-
self.log.exception(
1670-
"libvirt error code: {!r}".format(e.get_error_code())
1702+
exit_code = e.get_error_code()
1703+
self.log.error(
1704+
"Attempted {!s} with libvirt but exited with error "
1705+
"code: {!s}".format(action, exit_code)
16711706
)
1707+
if exit_code == libvirt.VIR_ERR_INTERNAL_ERROR:
1708+
raise timeout_exc
16721709
raise
16731710

16741711
if wait:

0 commit comments

Comments
 (0)