Skip to content

Commit 737d00d

Browse files
authored
Merge pull request #52 from sysprog21/harden
Harden guest exit, reset, and disk I/O paths
2 parents 606e9ca + 59530e7 commit 737d00d

9 files changed

Lines changed: 115 additions & 16 deletions

File tree

src/arch/arm64/desc.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
#pragma once
22

33
#define RAM_BASE (1UL << 31)
4+
5+
/* GIC SPIs (offset by ARM_GIC_SPI_BASE inside vm_irq_line). Distinct lines per
6+
* device so a level-triggered ARM GIC can de-assert per source rather than
7+
* sharing a vector across virtio paths.
8+
*/
49
#define SERIAL_IRQ 0
510
#define VIRTIO_BLK_IRQ 1
611
#define VIRTIO_NET_IRQ 2
7-
#define KERNEL_OPTS "console=ttyS0"
12+
13+
/* panic=-1 reboots immediately on guest panic. arm64 has no keyboard reset
14+
* path; the kernel issues a PSCI SYSTEM_RESET / SYSTEM_OFF, which KVM
15+
* surfaces as KVM_EXIT_SYSTEM_EVENT and vm_run() handles as a clean exit.
16+
*/
17+
#define KERNEL_OPTS "console=ttyS0 panic=-1"

src/arch/arm64/vm.c

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ int vm_arch_cpu_init(vm_t *v)
9292
if (ioctl(v->vm_fd, KVM_ARM_PREFERRED_TARGET, &vcpu_init) < 0)
9393
return throw_err("Failed to find perferred CPU type\n");
9494

95+
/* Enable in-kernel PSCI 0.2 emulation. Without this, a guest panic with
96+
* panic=-1 issues SYSTEM_OFF and KVM either ignores the SMC/HVC (so the
97+
* guest spins) or signals an undefined-instruction trap. With PSCI on,
98+
* the call surfaces as KVM_EXIT_SYSTEM_EVENT to the host loop.
99+
*/
100+
vcpu_init.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
101+
95102
if (ioctl(v->vcpu_fd, KVM_ARM_VCPU_INIT, &vcpu_init))
96103
return throw_err("Failed to initialize vCPU\n");
97104

@@ -142,9 +149,9 @@ int vm_arch_init_platform_device(vm_t *v)
142149
if (serial_init(&v->serial, &v->io_bus))
143150
return throw_err("Failed to init UART device");
144151

145-
/* Zero virtio_blk_dev so pci_dev_is_registered() observes a clean
146-
* state when the user boots without -d. virtio_net_init memsets
147-
* inside vm_enable_net, so virtio_net_dev is covered by that path.
152+
/* Zero virtio_blk_dev so pci_dev_is_registered() observes a clean state
153+
* when the user boots without -d. virtio_net_init memsets inside
154+
* vm_enable_net, so virtio_net_dev is covered by that path.
148155
* x86 already does the same call in its vm_arch_init_platform_device.
149156
*/
150157
virtio_blk_init(&v->virtio_blk_dev);
@@ -347,6 +354,17 @@ static int generate_fdt(vm_t *v)
347354
__FDT(property_cell, "phandle", FDT_PHANDLE_GIC);
348355
__FDT(end_node);
349356

357+
/* /psci node: lets the guest discover the in-kernel PSCI 0.2 emulator
358+
* we requested in vm_arch_cpu_init via KVM_ARM_VCPU_PSCI_0_2. KVM uses
359+
* HVC as the conduit on the virtual CPU. Without this node the kernel
360+
* doesn't know the firmware interface exists and falls back to a
361+
* spinloop on panic.
362+
*/
363+
__FDT(begin_node, "psci");
364+
__FDT(property_string, "compatible", "arm,psci-0.2");
365+
__FDT(property_string, "method", "hvc");
366+
__FDT(end_node);
367+
350368
/* /uart node: serial device */
351369
/* The node name of the serial device is different from kvmtool. */
352370
__FDT(begin_node, "uart");

src/arch/x86/desc.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
#pragma once
22

33
#define RAM_BASE 0
4+
5+
/* IO-APIC GSIs. Each device gets its own line so we never share a vector
6+
* between virtio devices, which keeps level-triggered ISA legacy IRQs (the
7+
* 16550 on IRQ4) out of the way of edge-triggered virtio MSI-less paths.
8+
*/
49
#define SERIAL_IRQ 4
510
#define VIRTIO_NET_IRQ 14
611
#define VIRTIO_BLK_IRQ 15
7-
#define KERNEL_OPTS "console=ttyS0 pci=conf1"
12+
13+
/* panic=-1 reboots immediately on guest panic; reboot=k uses the keyboard
14+
* controller path which on KVM ends in a triple-fault, surfacing cleanly as
15+
* KVM_EXIT_SHUTDOWN to the host loop in vm_run().
16+
*/
17+
#define KERNEL_OPTS "console=ttyS0 pci=conf1 panic=-1 reboot=k"

src/diskimg.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,18 @@ ssize_t diskimg_read(struct diskimg *diskimg,
99
off_t offset,
1010
size_t size)
1111
{
12-
lseek(diskimg->fd, offset, SEEK_SET);
13-
return read(diskimg->fd, data, size);
12+
/* pread/pwrite carry the offset in the syscall, so concurrent virtq
13+
* workers cannot race on a shared file pointer the way lseek+read does.
14+
*/
15+
return pread(diskimg->fd, data, size, offset);
1416
}
1517

1618
ssize_t diskimg_write(struct diskimg *diskimg,
1719
void *data,
1820
off_t offset,
1921
size_t size)
2022
{
21-
lseek(diskimg->fd, offset, SEEK_SET);
22-
return write(diskimg->fd, data, size);
23+
return pwrite(diskimg->fd, data, size, offset);
2324
}
2425

2526
int diskimg_flush(struct diskimg *diskimg)
@@ -33,7 +34,11 @@ int diskimg_init(struct diskimg *diskimg, const char *file_path)
3334
if (diskimg->fd < 0)
3435
return -1;
3536
struct stat st;
36-
fstat(diskimg->fd, &st);
37+
if (fstat(diskimg->fd, &st) < 0) {
38+
close(diskimg->fd);
39+
diskimg->fd = -1;
40+
return -1;
41+
}
3742
diskimg->size = st.st_size;
3843
return 0;
3944
}

src/main.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ int main(int argc, char *argv[])
7777
}
7878
}
7979

80-
set_input_mode();
81-
8280
vm_t vm;
8381
if (vm_init(&vm) < 0)
8482
return throw_err("Failed to initialize guest vm");
@@ -99,6 +97,11 @@ int main(int argc, char *argv[])
9997
if (vm_late_init(&vm) < 0)
10098
return -1;
10199

100+
/* Switch the terminal to raw mode only once setup has succeeded so that
101+
* any error from the load/init paths above is rendered on a normal tty.
102+
*/
103+
set_input_mode();
104+
102105
vm_run(&vm);
103106
vm_exit(&vm);
104107

src/virtio-blk.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,6 @@ static int virtio_blk_setup(struct virtio_blk_dev *dev, struct diskimg *diskimg)
275275
}
276276

277277
dev->enable = true;
278-
/* FIXME: irq_num should be different to other devs */
279278
dev->irq_num = VIRTIO_BLK_IRQ;
280279
dev->diskimg = diskimg;
281280
dev->config.capacity = diskimg->size >> 9;

src/virtio-pci.c

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,39 @@ static void virtio_pci_write_guest_feature(struct virtio_pci_dev *dev)
4646

4747
static void virtio_pci_reset(struct virtio_pci_dev *dev)
4848
{
49-
/* TODO: virtio pci reset */
49+
/* Virtio 1.x §2.4: writing 0 to device_status resets the device to its
50+
* initial post-power-on state. We clear the negotiated bits the guest
51+
* is about to re-write: acked features, ISR, the per-virtq packed-ring
52+
* indices, and the common-cfg selector bytes. This is sufficient for a
53+
* driver re-probe (reload, kexec) where the guest hasn't yet enabled
54+
* any virtqueue.
55+
*
56+
* We deliberately leave info.enable, desc_ring, and the device/driver
57+
* event pointers alone, because the device-emulator workers in
58+
* virtio-blk / virtio-net poll those without locking. A full reset that
59+
* tears down enabled queues would have to write the per-device stopfd
60+
* and pthread_join the workers, which the generic virtio-pci layer has
61+
* no handle on — leaving that for a follow-up that adds a
62+
* virtio_pci_ops::reset hook.
63+
*/
64+
dev->guest_feature = 0;
65+
dev->config.common_cfg.device_feature_select = 0;
66+
dev->config.common_cfg.guest_feature_select = 0;
67+
dev->config.common_cfg.guest_feature = 0;
68+
dev->config.common_cfg.queue_select = 0;
69+
__atomic_store_n(&dev->config.isr_cap.isr_status, 0, __ATOMIC_RELEASE);
70+
71+
for (uint16_t i = 0; i < dev->num_queues; i++) {
72+
struct virtq *vq = &dev->vq[i];
73+
if (vq->info.enable)
74+
continue;
75+
vq->info.size = VIRTQ_SIZE;
76+
vq->info.desc_addr = 0;
77+
vq->info.device_addr = 0;
78+
vq->info.driver_addr = 0;
79+
vq->next_avail_idx = 0;
80+
vq->used_wrap_count = 1;
81+
}
5082
}
5183

5284
static void virtio_pci_write_status(struct virtio_pci_dev *dev)
@@ -62,7 +94,7 @@ static void virtio_pci_select_virtq(struct virtio_pci_dev *dev)
6294
uint16_t select = dev->config.common_cfg.queue_select;
6395
struct virtio_pci_common_cfg *config = &dev->config.common_cfg;
6496

65-
if (select < config->num_queues) {
97+
if (select < dev->num_queues) {
6698
uint64_t offset = offsetof(struct virtio_pci_common_cfg, queue_size);
6799
memcpy((void *) ((uintptr_t) config + offset), &dev->vq[select].info,
68100
sizeof(struct virtq_info));
@@ -114,7 +146,7 @@ static void virtio_pci_space_write(struct virtio_pci_dev *dev,
114146
offset <= VIRTIO_PCI_COMMON_Q_USEDHI) {
115147
uint16_t select = dev->config.common_cfg.queue_select;
116148
uint64_t info_offset = offset - VIRTIO_PCI_COMMON_Q_SIZE;
117-
if (select < dev->config.common_cfg.num_queues) {
149+
if (select < dev->num_queues) {
118150
memcpy((void *) ((uintptr_t) &dev->vq[select].info +
119151
info_offset),
120152
data, size);
@@ -251,6 +283,7 @@ void virtio_pci_set_virtq(struct virtio_pci_dev *dev,
251283
struct virtq *vq,
252284
uint16_t num_queues)
253285
{
286+
dev->num_queues = num_queues;
254287
dev->config.common_cfg.num_queues = num_queues;
255288
dev->vq = vq;
256289
}

src/virtio-pci.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ struct virtio_pci_dev {
3535
struct virtio_pci_notify_cap *notify_cap;
3636
struct virtio_pci_cap *dev_cfg_cap;
3737
struct virtq *vq;
38+
/* Host-side mirror of the queue count. config.common_cfg.num_queues
39+
* lives in guest-writable BAR memory (the unconditional memcpy in
40+
* virtio_pci_space_write lets a guest overwrite it), so trusting it
41+
* to bound vq[] indexing is an OOB-write primitive. Bounds checks
42+
* use this field instead.
43+
*/
44+
uint16_t num_queues;
3845
};
3946

4047
uint64_t virtio_pci_get_notify_addr(struct virtio_pci_dev *dev,

src/vm.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,20 @@ int vm_run(vm_t *v)
161161
printf("shutdown\n");
162162
munmap(run, run_size);
163163
return 0;
164+
case KVM_EXIT_SYSTEM_EVENT: {
165+
/* arm64 PSCI SYSTEM_OFF / SYSTEM_RESET land here. SHUTDOWN and
166+
* RESET are clean exits from our POV — kvm-host has no reboot
167+
* loop, and a guest panic with panic=-1 reaches us as RESET
168+
* (indistinguishable from a userspace `reboot`), matching the
169+
* x86 reboot=k path that comes back as KVM_EXIT_SHUTDOWN.
170+
* CRASH is the one type that signals host-relevant failure
171+
* (NMI watchdog, kdump trigger), so propagate it as -1.
172+
*/
173+
uint32_t type = run->system_event.type;
174+
printf("system event %u\n", type);
175+
munmap(run, run_size);
176+
return type == KVM_SYSTEM_EVENT_CRASH ? -1 : 0;
177+
}
164178
default:
165179
printf("reason: %d\n", run->exit_reason);
166180
munmap(run, run_size);

0 commit comments

Comments
 (0)