Skip to content

Commit 567e8d9

Browse files
committed
Enable VirtIO block to access hostOS /dev/ block devices
The user may not always have a disk image but might have a /dev/x block device, such as a USB drive that they want to share with the guest OS. So, allowing this type of virtio-blk source is intuitive. To support this, ioctl is used to retrieve the actual size of the /dev/x block device. This implementation supports both Apple and Linux platforms. On Apple platforms, mmap() on block devices appears to be unsupported with various flag combinations. To address this, a fallback mechanism is added and used when mmap() fails, using malloc() along with pread(), pwrite() and fsync() on the block device fd to emulate the behavior of mmap(). Additionally, the initial fallback was incomplete, as it only allocated heap memory without loading the block device's content into memory. This commit resolves the issue by properly reading the device contents into the allocated buffer. Since there may be asynchronous exits, a new rv_fsync_device() function is introduced to ensure the block device is properly synchronized during such exits, and it can also be invoked by other asynchronous exit paths. To fully support this fallback, disk_fd and disk_size are now stored in the vblk state during its initialization. Close #544
1 parent 885c378 commit 567e8d9

File tree

4 files changed

+119
-20
lines changed

4 files changed

+119
-20
lines changed

src/devices/virtio-blk.c

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,22 @@
55

66
#include <assert.h>
77
#include <fcntl.h>
8+
#include <libgen.h>
89
#include <stdbool.h>
910
#include <stdio.h>
1011
#include <stdlib.h>
1112
#include <string.h>
13+
#include <sys/ioctl.h>
1214
#include <sys/mman.h>
1315
#include <sys/stat.h>
1416
#include <unistd.h>
1517

18+
#if defined(__APPLE__)
19+
#include <sys/disk.h> /* DKIOCGETBLOCKCOUNT and DKIOCGETBLOCKSIZE */
20+
#else
21+
#include <linux/fs.h> /* BLKGETSIZE64 */
22+
#endif
23+
1624
#include "virtio.h"
1725

1826
#define DISK_BLK_SIZE 512
@@ -97,12 +105,16 @@ static void virtio_blk_update_status(virtio_blk_state_t *vblk, uint32_t status)
97105
uint32_t device_features = vblk->device_features;
98106
uint32_t *ram = vblk->ram;
99107
uint32_t *disk = vblk->disk;
108+
uint64_t disk_size = vblk->disk_size;
109+
int disk_fd = vblk->disk_fd;
100110
void *priv = vblk->priv;
101111
uint32_t capacity = VBLK_PRIV(vblk)->capacity;
102112
memset(vblk, 0, sizeof(*vblk));
103113
vblk->device_features = device_features;
104114
vblk->ram = ram;
105115
vblk->disk = disk;
116+
vblk->disk_size = disk_size;
117+
vblk->disk_fd = disk_fd;
106118
vblk->priv = priv;
107119
VBLK_PRIV(vblk)->capacity = capacity;
108120
}
@@ -388,6 +400,9 @@ uint32_t *virtio_blk_init(virtio_blk_state_t *vblk,
388400
exit(EXIT_FAILURE);
389401
}
390402

403+
/* For mmap_fallback */
404+
vblk->disk_fd = -1;
405+
391406
/* Allocate memory for the private member */
392407
vblk->priv = &vblk_configs[vblk_dev_cnt++];
393408

@@ -403,29 +418,74 @@ uint32_t *virtio_blk_init(virtio_blk_state_t *vblk,
403418
int disk_fd = open(disk_file, readonly ? O_RDONLY : O_RDWR);
404419
if (disk_fd < 0) {
405420
rv_log_error("Could not open %s", disk_file);
406-
exit(EXIT_FAILURE);
421+
goto fail;
407422
}
408423

409-
/* Get the disk image size */
410-
struct stat st;
411-
fstat(disk_fd, &st);
412-
VBLK_PRIV(vblk)->disk_size = st.st_size;
424+
const char *disk_file_dirname = dirname(disk_file);
425+
if (!disk_file_dirname) {
426+
rv_log_error("Fail dirname disk_file: %s", disk_file);
427+
goto disk_size_fail;
428+
}
429+
/* Get the disk size */
430+
uint64_t disk_size;
431+
if (strcmp(disk_file_dirname, "/dev") ==
432+
0) { /* from /dev/, leverage ioctl */
433+
#if defined(__APPLE__)
434+
uint32_t block_size;
435+
uint64_t block_count;
436+
if (ioctl(disk_fd, DKIOCGETBLOCKCOUNT, &block_count) == -1) {
437+
rv_log_error("DKIOCGETBLOCKCOUNT failed");
438+
goto disk_size_fail;
439+
}
440+
if (ioctl(disk_fd, DKIOCGETBLOCKSIZE, &block_size) == -1) {
441+
rv_log_error("DKIOCGETBLOCKSIZE failed");
442+
goto disk_size_fail;
443+
}
444+
disk_size = block_count * block_size;
445+
#else /* Linux */
446+
if (ioctl(disk_fd, BLKGETSIZE64, &disk_size) == -1) {
447+
rv_log_error("BLKGETSIZE64 failed");
448+
goto disk_size_fail;
449+
}
450+
#endif
451+
} else { /* other path, stat it as normal file */
452+
struct stat st;
453+
if (fstat(disk_fd, &st) == -1) {
454+
rv_log_error("fstat failed");
455+
goto disk_size_fail;
456+
}
457+
disk_size = st.st_size;
458+
}
459+
VBLK_PRIV(vblk)->disk_size = disk_size;
413460

414461
/* Set up the disk memory */
415462
uint32_t *disk_mem;
416-
#if HAVE_MMAP
417463
disk_mem = mmap(NULL, VBLK_PRIV(vblk)->disk_size,
418464
readonly ? PROT_READ : (PROT_READ | PROT_WRITE), MAP_SHARED,
419465
disk_fd, 0);
420-
if (disk_mem == MAP_FAILED)
421-
goto err;
422-
#else
466+
if (disk_mem == MAP_FAILED) {
467+
rv_log_trace(
468+
"Fallback to malloc block device due to mmap failed or the "
469+
"operation is unsupported");
470+
goto mmap_fallback;
471+
}
472+
/*
473+
* disk_fd should be closed on exit after flushing heap data back to the
474+
* device when using mmap_fallback.
475+
*/
476+
close(disk_fd);
477+
goto disk_mem_ok;
478+
479+
mmap_fallback:
423480
disk_mem = malloc(VBLK_PRIV(vblk)->disk_size);
424481
if (!disk_mem)
425-
goto err;
426-
#endif
482+
goto disk_mem_err;
483+
vblk->disk_fd = disk_fd;
484+
vblk->disk_size = disk_size;
485+
pread(disk_fd, disk_mem, disk_size, 0);
486+
487+
disk_mem_ok:
427488
assert(!(((uintptr_t) disk_mem) & 0b11));
428-
close(disk_fd);
429489

430490
vblk->disk = disk_mem;
431491
VBLK_PRIV(vblk)->capacity =
@@ -436,9 +496,14 @@ uint32_t *virtio_blk_init(virtio_blk_state_t *vblk,
436496

437497
return disk_mem;
438498

439-
err:
499+
disk_mem_err:
440500
rv_log_error("Could not map disk %s", disk_file);
441-
return NULL;
501+
502+
disk_size_fail:
503+
close(disk_fd);
504+
505+
fail:
506+
exit(EXIT_FAILURE);
442507
}
443508

444509
virtio_blk_state_t *vblk_new()
@@ -450,10 +515,9 @@ virtio_blk_state_t *vblk_new()
450515

451516
void vblk_delete(virtio_blk_state_t *vblk)
452517
{
453-
#if HAVE_MMAP
454-
munmap(vblk->disk, VBLK_PRIV(vblk)->disk_size);
455-
#else
456-
free(vblk->disk);
457-
#endif
518+
if (vblk->disk_fd == -1)
519+
munmap(vblk->disk, VBLK_PRIV(vblk)->disk_size);
520+
else
521+
free(vblk->disk);
458522
free(vblk);
459523
}

src/devices/virtio.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ typedef struct {
103103
/* supplied by environment */
104104
uint32_t *ram;
105105
uint32_t *disk;
106+
uint64_t disk_size;
107+
int disk_fd;
106108
/* implementation-specific */
107109
void *priv;
108110
} virtio_blk_state_t;

src/main.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,11 @@ int main(int argc, char **args)
298298

299299
/* finalize the RISC-V runtime */
300300
rv_delete(rv);
301+
/*
302+
* Other translation units cannot update the pointer, update it here
303+
* to prevent multiple atexit()'s callback be called.
304+
*/
305+
rv = NULL;
301306
rv_log_info("RISC-V emulator is destroyed");
302307

303308
end:

src/riscv.c

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,29 @@ static void rv_async_block_clear()
376376
return;
377377
#endif /* !RV32_HAS(JIT) */
378378
}
379-
#endif
379+
380+
static void rv_fsync_device()
381+
{
382+
if (!rv)
383+
return;
384+
385+
/* mmap_fallback, need to write and sync the device */
386+
vm_attr_t *attr = PRIV(rv);
387+
if (attr->vblk && attr->vblk->disk_fd >= 3) {
388+
if (pwrite(attr->vblk->disk_fd, attr->vblk->disk, attr->vblk->disk_size,
389+
0) == -1) {
390+
rv_log_error("pwrite block device failed");
391+
return;
392+
}
393+
if (fsync(attr->vblk->disk_fd) == -1) {
394+
rv_log_error("fsync block device failed");
395+
return;
396+
}
397+
close(attr->vblk->disk_fd);
398+
rv_log_info("Sync block devices OK");
399+
}
400+
}
401+
#endif /* RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) */
380402

381403
riscv_t *rv_create(riscv_user_t rv_attr)
382404
{
@@ -388,6 +410,8 @@ riscv_t *rv_create(riscv_user_t rv_attr)
388410
#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER)
389411
/* register cleaning callback for CTRL+a+x exit */
390412
atexit(rv_async_block_clear);
413+
/* register device sync callback for CTRL+a+x exit */
414+
atexit(rv_fsync_device);
391415
#endif
392416

393417
/* copy over the attr */
@@ -687,6 +711,10 @@ void rv_delete(riscv_t *rv)
687711
#if !RV32_HAS(JIT) || (RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER))
688712
vm_attr_t *attr = PRIV(rv);
689713
#endif
714+
715+
/* sync device before cleaning up */
716+
rv_fsync_device();
717+
690718
#if !RV32_HAS(JIT)
691719
map_delete(attr->fd_map);
692720
memory_delete(attr->mem);

0 commit comments

Comments
 (0)