diff --git a/cmd/qvm/rebuild.go b/cmd/qvm/rebuild.go index a96ac6f..7366f74 100644 --- a/cmd/qvm/rebuild.go +++ b/cmd/qvm/rebuild.go @@ -144,6 +144,15 @@ use the new image.`, logging.Info("Base disk image built successfully") + // Delete the overlay since it was backed by the old base image + // A stale overlay causes GRUB/boot failures because disk sectors no longer match + if _, err := os.Stat(config.Overlay); err == nil { + logging.Info("Removing old overlay (incompatible with new base image)...") + if err := os.Remove(config.Overlay); err != nil { + logging.Warn(fmt.Sprintf("Failed to remove overlay: %v", err)) + } + } + logging.Info("Building VM runner script from flake...") vmRunnerLink := filepath.Join(config.StateDir, "vm-runner-result") @@ -166,11 +175,10 @@ use the new image.`, logging.Info("Rebuild complete!") if vm.IsRunning() { - logging.Warn("VM is currently running") + logging.Warn("VM is currently running with the old image") fmt.Println("") - fmt.Println("The new base image will only take effect after restarting the VM:") - fmt.Println(" qvm stop") - fmt.Println(" qvm start") + fmt.Println("Restart the VM to use the new base image:") + fmt.Println(" qvm stop && qvm start") fmt.Println("") } }, diff --git a/internal/qmp/client.go b/internal/qmp/client.go index 1e687e5..b7287fd 100644 --- a/internal/qmp/client.go +++ b/internal/qmp/client.go @@ -94,12 +94,12 @@ func (c *Client) AddChardev(id, socketPath string) mo.Result[struct{}] { return mo.Ok(struct{}{}) } -// AddVhostUserFsDevice adds a vhost-user-fs-pci device connected to a chardev. -// This is the second step of hot-mounting a filesystem. -func (c *Client) AddVhostUserFsDevice(chardevID, tag string) mo.Result[struct{}] { +// AddVhostUserFsDevice adds a vhost-user-fs-pci device connected to a chardev, +// targeting a hotplug-capable PCIe root port bus. +func (c *Client) AddVhostUserFsDevice(chardevID, tag, bus string) mo.Result[struct{}] { // Device ID must be unique, derive from chardev ID deviceID := "dev_" + chardevID - cmd := fmt.Sprintf(`{"execute":"device_add","arguments":{"driver":"vhost-user-fs-pci","chardev":"%s","tag":"%s","id":"%s","queue-size":1024}}`, chardevID, tag, deviceID) + cmd := fmt.Sprintf(`{"execute":"device_add","arguments":{"driver":"vhost-user-fs-pci","chardev":"%s","tag":"%s","id":"%s","queue-size":1024,"bus":"%s"}}`, chardevID, tag, deviceID, bus) raw, err := c.monitor.Run([]byte(cmd)) if err != nil { return mo.Err[struct{}](fmt.Errorf("failed to add vhost-user-fs device for %s: %w", tag, err)) @@ -116,9 +116,67 @@ func (c *Client) AddVhostUserFsDevice(chardevID, tag string) mo.Result[struct{}] return mo.Ok(struct{}{}) } +// FindFreeHotplugBus queries PCI devices and returns the first hotplug root port +// that has no child devices attached. Returns an error if all slots are occupied. +func (c *Client) FindFreeHotplugBus(busPrefix string, slotCount int) mo.Result[string] { + // Query all PCI devices to find which buses are occupied + cmd := []byte(`{"execute":"query-pci"}`) + raw, err := c.monitor.Run(cmd) + if err != nil { + return mo.Err[string](fmt.Errorf("failed to query PCI devices: %w", err)) + } + + // Parse to find which hotplug buses have devices + var resp struct { + Return []struct { + Devices []struct { + Bus int `json:"bus"` + QdevID string `json:"qdev_id"` + PCIBridge *struct { + Bus struct { + Number int `json:"number"` + } `json:"bus"` + Devices []json.RawMessage `json:"devices"` + } `json:"pci_bridge"` + } `json:"devices"` + } `json:"return"` + } + if err := json.Unmarshal(raw, &resp); err != nil { + return mo.Err[string](fmt.Errorf("failed to parse PCI response: %w", err)) + } + + // Build set of occupied hotplug buses + occupied := make(map[string]bool) + for _, bus := range resp.Return { + for _, dev := range bus.Devices { + if dev.PCIBridge != nil && len(dev.PCIBridge.Devices) > 0 { + occupied[dev.QdevID] = true + } + } + } + + // Find first free hotplug slot + for i := 0; i < slotCount; i++ { + busID := fmt.Sprintf("%s%d", busPrefix, i) + if !occupied[busID] { + return mo.Ok(busID) + } + } + + return mo.Err[string](fmt.Errorf("all %d hotplug slots are occupied", slotCount)) +} + // HotMountFilesystem performs a complete hot-mount of a virtiofsd filesystem. // Requires the virtiofsd daemon to already be running and listening on socketPath. -func (c *Client) HotMountFilesystem(tag, socketPath string) mo.Result[struct{}] { +// Finds a free PCIe hotplug root port and attaches the device there. +func (c *Client) HotMountFilesystem(tag, socketPath, busPrefix string, slotCount int) mo.Result[struct{}] { + // Find a free hotplug bus + busResult := c.FindFreeHotplugBus(busPrefix, slotCount) + if busResult.IsError() { + return mo.Err[struct{}](busResult.Error()) + } + bus := busResult.MustGet() + // Use tag as chardev ID for simplicity chardevID := tag @@ -127,8 +185,8 @@ func (c *Client) HotMountFilesystem(tag, socketPath string) mo.Result[struct{}] return result } - // Step 2: Add device - if result := c.AddVhostUserFsDevice(chardevID, tag); result.IsError() { + // Step 2: Add device on the hotplug bus + if result := c.AddVhostUserFsDevice(chardevID, tag, bus); result.IsError() { return result } diff --git a/internal/vm/lifecycle.go b/internal/vm/lifecycle.go index 35b6fa9..5d7a92b 100644 --- a/internal/vm/lifecycle.go +++ b/internal/vm/lifecycle.go @@ -196,8 +196,8 @@ func HotMountWorkspace(ws *workspace.Workspace) mo.Result[struct{}] { client := qmpResult.MustGet() defer client.Close() - // Hot-mount the filesystem - hotMountResult := client.HotMountFilesystem(ws.MountTag, mount.SocketPath) + // Hot-mount the filesystem onto a free PCIe hotplug root port + hotMountResult := client.HotMountFilesystem(ws.MountTag, mount.SocketPath, HotplugBusPrefix, HotplugSlots) if hotMountResult.IsError() { vfsManager.StopMount(mount) return mo.Err[struct{}](fmt.Errorf("failed to hot-mount filesystem: %w", hotMountResult.Error())) diff --git a/internal/vm/qemu.go b/internal/vm/qemu.go index 7ca282c..d4ce7e5 100644 --- a/internal/vm/qemu.go +++ b/internal/vm/qemu.go @@ -7,8 +7,18 @@ import ( "strconv" ) +// HotplugSlots is the number of PCIe root ports reserved for hot-plugging +// workspace mounts into a running VM. Each hot-mounted virtiofs device needs +// its own root port since pcie.0 does not support hotplug. +const HotplugSlots = 16 + +// HotplugBusPrefix is the bus ID prefix for hotplug-capable PCIe root ports. +// Slots are named hotplug0, hotplug1, ..., hotplugN. +const HotplugBusPrefix = "hotplug" + // buildQEMUCommand builds the QEMU command line for virtiofsd-based mounts. -// Uses vhost-user-fs-pci devices which support hot-plugging. +// Uses vhost-user-fs-pci devices for boot-time mounts and provisions empty +// PCIe root ports for hot-plugging additional mounts at runtime. func buildQEMUCommand(cfg *config.Config, sshPort int, mounts []virtiofsd.Mount) []string { memSize := cfg.VM.Memory @@ -31,7 +41,7 @@ func buildQEMUCommand(cfg *config.Config, sshPort int, mounts []virtiofsd.Mount) "-qmp", fmt.Sprintf("unix:%s,server,nowait", config.QMPSocket), } - // Add vhost-user-fs devices for each mount + // Add vhost-user-fs devices for each boot-time mount for _, mount := range mounts { args = append(args, "-chardev", fmt.Sprintf("socket,id=%s,path=%s", mount.Tag, mount.SocketPath), @@ -39,5 +49,15 @@ func buildQEMUCommand(cfg *config.Config, sshPort int, mounts []virtiofsd.Mount) ) } + // Provision empty PCIe root ports for hot-plugging workspace mounts. + // The q35 root bus (pcie.0) does not support hotplug, so we need + // dedicated root ports that accept device_add at runtime. + for i := 0; i < HotplugSlots; i++ { + busID := fmt.Sprintf("%s%d", HotplugBusPrefix, i) + args = append(args, + "-device", fmt.Sprintf("pcie-root-port,id=%s,slot=%d", busID, i+1), + ) + } + return args }