Add qvm doctor command to diagnose and fix common issues

2026-01-27 11:54:26 -06:00 · 2026-01-27 11:54:26 -06:00 · eb469f1cd8
commit eb469f1cd8
parent 2aec01b3b2
8 changed files with 660 additions and 170 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1 @@
 use flake
--- a/cmd/qvm/doctor.go
+++ b/cmd/qvm/doctor.go
@ -0,0 +1,280 @@
 package main
 import (
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"qvm/internal/config"
 	"qvm/internal/vm"
 	"qvm/internal/workspace"
 	"strconv"
 	"strings"
 	"github.com/spf13/cobra"
 )
 var doctorCmd = &cobra.Command{
 	Use:   "doctor",
 	Short: "Diagnose and fix common QVM issues",
 	Long: `Runs diagnostic checks on QVM configuration and state.
 Checks for:
 - KVM availability and permissions
 - Required binaries (qemu-system-x86_64, virtiofsd, sshpass, nc)
 - Base image presence
 - Stale state files (orphaned PID files, sockets)
 - VM process state consistency
 If issues are found, provides remediation steps.`,
 	Run: func(cmd *cobra.Command, args []string) {
 		var issues []string
 		var fixes []string
 		fmt.Println("QVM Doctor - Diagnosing issues...")
 		fmt.Println()
 		// Check 1: KVM availability
 		fmt.Print("Checking KVM availability... ")
 		if _, err := os.Stat("/dev/kvm"); err != nil {
 			fmt.Println("FAIL")
 			issues = append(issues, "KVM not available (/dev/kvm not found)")
 			fixes = append(fixes, "Ensure KVM is enabled in BIOS and kvm module is loaded: modprobe kvm_intel (or kvm_amd)")
 		} else {
 			// Check KVM permissions
 			f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0)
 			if err != nil {
 				fmt.Println("FAIL")
 				issues = append(issues, fmt.Sprintf("Cannot access /dev/kvm: %v", err))
 				fixes = append(fixes, "Add your user to the kvm group: sudo usermod -aG kvm $USER (then logout/login)")
 			} else {
 				f.Close()
 				fmt.Println("OK")
 			}
 		}
 		// Check 2: Required binaries
 		requiredBinaries := []string{
 			"qemu-system-x86_64",
 			"virtiofsd",
 			"sshpass",
 			"nc",
 			"qemu-img",
 		}
 		for _, bin := range requiredBinaries {
 			fmt.Printf("Checking for %s... ", bin)
 			if _, err := exec.LookPath(bin); err != nil {
 				fmt.Println("MISSING")
 				issues = append(issues, fmt.Sprintf("Required binary not found: %s", bin))
 				fixes = append(fixes, fmt.Sprintf("Install %s via your package manager or nix", bin))
 			} else {
 				fmt.Println("OK")
 			}
 		}
 		// Check 3: Base image
 		fmt.Print("Checking base image... ")
 		if _, err := os.Stat(config.BaseImage); err != nil {
 			fmt.Println("MISSING")
 			issues = append(issues, "Base image not found at "+config.BaseImage)
 			fixes = append(fixes, "Run 'qvm rebuild' to build the base image")
 		} else {
 			fmt.Println("OK")
 		}
 		// Check 4: State file consistency
 		fmt.Print("Checking state files... ")
 		stateIssues := checkStateFiles()
 		if len(stateIssues) > 0 {
 			fmt.Println("ISSUES FOUND")
 			issues = append(issues, stateIssues...)
 			fixes = append(fixes, "Run 'qvm doctor --fix' to clean up stale state files")
 		} else {
 			fmt.Println("OK")
 		}
 		// Check 5: virtiofsd sockets
 		fmt.Print("Checking virtiofsd sockets... ")
 		socketIssues := checkVirtiofsdSockets()
 		if len(socketIssues) > 0 {
 			fmt.Println("ISSUES FOUND")
 			issues = append(issues, socketIssues...)
 			fixes = append(fixes, "Stale sockets will be cleaned up on next start")
 		} else {
 			fmt.Println("OK")
 		}
 		// Check 6: Workspace registry
 		fmt.Print("Checking workspace registry... ")
 		regResult := workspace.Load(config.WorkspacesFile)
 		if regResult.IsError() {
 			fmt.Println("ERROR")
 			issues = append(issues, fmt.Sprintf("Cannot load workspace registry: %v", regResult.Error()))
 			fixes = append(fixes, "The registry will be recreated on next 'qvm run'")
 		} else {
 			reg := regResult.MustGet()
 			workspaces := reg.List()
 			invalidCount := 0
 			for _, ws := range workspaces {
 				if _, err := os.Stat(ws.HostPath); err != nil {
 					invalidCount++
 				}
 			}
 			if invalidCount > 0 {
 				fmt.Printf("WARNING (%d workspaces with missing host paths)\n", invalidCount)
 			} else {
 				fmt.Printf("OK (%d workspaces registered)\n", len(workspaces))
 			}
 		}
 		fmt.Println()
 		// Summary
 		if len(issues) == 0 {
 			fmt.Println("All checks passed!")
 		} else {
 			fmt.Printf("Found %d issue(s):\n", len(issues))
 			fmt.Println()
 			for i, issue := range issues {
 				fmt.Printf("  %d. %s\n", i+1, issue)
 				if i < len(fixes) {
 					fmt.Printf("     Fix: %s\n", fixes[i])
 				}
 			}
 			fmt.Println()
 			fixFlag, _ := cmd.Flags().GetBool("fix")
 			if fixFlag {
 				fmt.Println("Applying automatic fixes...")
 				applyFixes()
 			} else {
 				fmt.Println("Run 'qvm doctor --fix' to apply automatic fixes where possible.")
 			}
 		}
 	},
 }
 func init() {
 	doctorCmd.Flags().Bool("fix", false, "Attempt to automatically fix issues")
 }
 // checkStateFiles verifies that PID files correspond to running processes
 func checkStateFiles() []string {
 	var issues []string
 	// Check if PID file exists but process doesn't
 	pidBytes, err := os.ReadFile(config.PIDFile)
 	if err == nil {
 		pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
 		if err == nil {
 			if !vm.IsRunning() {
 				issues = append(issues, fmt.Sprintf("Stale PID file: %s (process %d not running)", config.PIDFile, pid))
 			}
 		} else {
 			issues = append(issues, fmt.Sprintf("Invalid PID file: %s", config.PIDFile))
 		}
 	}
 	// Check for orphaned QMP socket
 	if _, err := os.Stat(config.QMPSocket); err == nil && !vm.IsRunning() {
 		issues = append(issues, fmt.Sprintf("Orphaned QMP socket: %s", config.QMPSocket))
 	}
 	// Check for orphaned SSH port file
 	if _, err := os.Stat(config.SSHPortFile); err == nil && !vm.IsRunning() {
 		issues = append(issues, fmt.Sprintf("Orphaned SSH port file: %s", config.SSHPortFile))
 	}
 	return issues
 }
 // checkVirtiofsdSockets looks for orphaned virtiofsd sockets
 func checkVirtiofsdSockets() []string {
 	var issues []string
 	pattern := filepath.Join(config.StateDir, "*.sock")
 	sockets, err := filepath.Glob(pattern)
 	if err != nil {
 		return issues
 	}
 	for _, sock := range sockets {
 		// Skip QMP socket
 		if sock == config.QMPSocket {
 			continue
 		}
 		// Check if corresponding virtiofsd is running
 		baseName := strings.TrimSuffix(filepath.Base(sock), ".sock")
 		pidFile := filepath.Join(config.StateDir, fmt.Sprintf("virtiofsd-%s.pid", baseName))
 		if _, err := os.Stat(pidFile); os.IsNotExist(err) {
 			issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (no PID file)", sock))
 			continue
 		}
 		pidBytes, err := os.ReadFile(pidFile)
 		if err != nil {
 			continue
 		}
 		pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
 		if err != nil {
 			issues = append(issues, fmt.Sprintf("Invalid virtiofsd PID file: %s", pidFile))
 			continue
 		}
 		// Check if process is running
 		process, err := os.FindProcess(pid)
 		if err != nil {
 			issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (process lookup failed)", sock))
 			continue
 		}
 		if err := process.Signal(os.Signal(nil)); err != nil {
 			issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (process %d not running)", sock, pid))
 		}
 	}
 	return issues
 }
 // applyFixes attempts to clean up stale state files
 func applyFixes() {
 	if !vm.IsRunning() {
 		// Clean up all state files
 		fmt.Println("  Cleaning up stale state files...")
 		if err := os.Remove(config.PIDFile); err == nil {
 			fmt.Printf("    Removed: %s\n", config.PIDFile)
 		}
 		if err := os.Remove(config.SSHPortFile); err == nil {
 			fmt.Printf("    Removed: %s\n", config.SSHPortFile)
 		}
 		if err := os.Remove(config.QMPSocket); err == nil {
 			fmt.Printf("    Removed: %s\n", config.QMPSocket)
 		}
 		// Clean up virtiofsd sockets and PID files
 		pattern := filepath.Join(config.StateDir, "*.sock")
 		sockets, _ := filepath.Glob(pattern)
 		for _, sock := range sockets {
 			if err := os.Remove(sock); err == nil {
 				fmt.Printf("    Removed: %s\n", sock)
 			}
 		}
 		pattern = filepath.Join(config.StateDir, "virtiofsd-*.pid")
 		pidFiles, _ := filepath.Glob(pattern)
 		for _, pidFile := range pidFiles {
 			if err := os.Remove(pidFile); err == nil {
 				fmt.Printf("    Removed: %s\n", pidFile)
 			}
 		}
 		fmt.Println("  Done!")
 	} else {
 		fmt.Println("  VM is running, cannot clean up state files. Stop VM first with 'qvm stop'.")
 	}
 }
--- a/cmd/qvm/main.go
+++ b/cmd/qvm/main.go
@ -28,6 +28,7 @@ func init() {
 	rootCmd.AddCommand(rebuildCmd)
 	rootCmd.AddCommand(resetCmd)
 	rootCmd.AddCommand(cleanCmd)
 	rootCmd.AddCommand(doctorCmd)
 }
 func main() {
--- a/cmd/qvm/run.go
+++ b/cmd/qvm/run.go
@ -23,7 +23,10 @@ var runCmd = &cobra.Command{
 The current directory is automatically registered as a workspace
 and mounted into the VM. The command runs in the mounted workspace.
-If no command is provided, starts an interactive zsh shell.`,
+If no command is provided, starts an interactive zsh shell.
 Unlike previous versions, new workspaces can be hot-mounted into
 a running VM without requiring a restart.`,
 	Args: cobra.ArbitraryArgs,
 	Run: func(cmd *cobra.Command, args []string) {
 		// Default to zsh if no command provided
@ -72,32 +75,24 @@ If no command is provided, starts an interactive zsh shell.`,
 				os.Exit(1)
 			}
 		} else {
-			statusResult := vm.Status()
+			// Check if workspace is mounted, hot-mount if not
-			if statusResult.IsError() {
+			if !vm.IsWorkspaceMounted(ws) {
-				logging.Error(statusResult.Error().Error())
+				logging.Info(fmt.Sprintf("Hot-mounting workspace %s...", ws.Hash))
-				os.Exit(1)
+				hotMountResult := vm.HotMountWorkspace(ws)
-			}
+				if hotMountResult.IsError() {
-			status := statusResult.MustGet()
+					logging.Error(fmt.Sprintf("Failed to hot-mount workspace: %v", hotMountResult.Error()))
 					logging.Info("Falling back to VM restart...")
-			checkCmd := exec.Command("sshpass", "-p", "root", "ssh",
+					stopResult := vm.Stop()
-				"-o", "StrictHostKeyChecking=no",
+					if stopResult.IsError() {
-				"-o", "UserKnownHostsFile=/dev/null",
+						logging.Error(stopResult.Error().Error())
-				"-o", "LogLevel=ERROR",
+						os.Exit(1)
-				"-p", strconv.Itoa(status.SSHPort),
+					}
-				"root@localhost",
+					startResult := vm.Start(cfg, reg)
-				fmt.Sprintf("test -d /sys/bus/virtio/drivers/9pnet_virtio/*/mount_tag && grep -q 'ws_%s' /sys/bus/virtio/drivers/9pnet_virtio/*/mount_tag 2>/dev/null", ws.Hash))
+					if startResult.IsError() {
-
+						logging.Error(startResult.Error().Error())
-			if checkCmd.Run() != nil {
+						os.Exit(1)
-				logging.Info("Workspace not available in running VM, restarting VM...")
+					}
 				stopResult := vm.Stop()
 				if stopResult.IsError() {
 					logging.Error(stopResult.Error().Error())
 					os.Exit(1)
 				}
 				startResult := vm.Start(cfg, reg)
 				if startResult.IsError() {
 					logging.Error(startResult.Error().Error())
 					os.Exit(1)
 				}
 			}
 		}
@ -109,8 +104,11 @@ If no command is provided, starts an interactive zsh shell.`,
 		}
 		status := statusResult.MustGet()
-		remoteCmd := fmt.Sprintf("mkdir -p '%s' && (mountpoint -q '%s' || mount -t 9p ws_%s '%s' -o trans=virtio,version=9p2000.L,msize=104857600) && cd '%s' && %s",
+		// Use virtiofs mount instead of 9p
-			ws.GuestPath, ws.GuestPath, ws.Hash, ws.GuestPath, ws.GuestPath, strings.Join(args, " "))
+		// virtiofs mounts use the tag name directly
 		remoteCmd := fmt.Sprintf(
 			"mkdir -p '%s' && (mountpoint -q '%s' || mount -t virtiofs %s '%s') && cd '%s' && %s",
 			ws.GuestPath, ws.GuestPath, ws.MountTag, ws.GuestPath, ws.GuestPath, strings.Join(args, " "))
 		sshArgs := []string{
 			"-p", "root",
--- a/internal/lock/lock.go
+++ b/internal/lock/lock.go
@ -0,0 +1,78 @@
 // Package lock provides file-based locking for VM operations.
 package lock
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"syscall"
 	"time"
 	"github.com/samber/mo"
 	"qvm/internal/config"
 )
 // Lock represents an exclusive file lock on VM operations.
 type Lock struct {
 	file *os.File
 }
 var lockPath = filepath.Join(config.StateDir, "vm.lock")
 // Acquire obtains an exclusive lock on VM operations.
 // Blocks until the lock is available or timeout is reached.
 func Acquire(timeout time.Duration) mo.Result[*Lock] {
 	if err := os.MkdirAll(config.StateDir, 0755); err != nil {
 		return mo.Err[*Lock](fmt.Errorf("failed to create state directory: %w", err))
 	}
 	file, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0644)
 	if err != nil {
 		return mo.Err[*Lock](fmt.Errorf("failed to open lock file: %w", err))
 	}
 	deadline := time.Now().Add(timeout)
 	for {
 		err := syscall.Flock(int(file.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
 		if err == nil {
 			return mo.Ok(&Lock{file: file})
 		}
 		if time.Now().After(deadline) {
 			file.Close()
 			return mo.Err[*Lock](fmt.Errorf("timeout waiting for VM lock after %v", timeout))
 		}
 		time.Sleep(100 * time.Millisecond)
 	}
 }
 // TryAcquire attempts to obtain an exclusive lock without blocking.
 // Returns error if lock is held by another process.
 func TryAcquire() mo.Result[*Lock] {
 	if err := os.MkdirAll(config.StateDir, 0755); err != nil {
 		return mo.Err[*Lock](fmt.Errorf("failed to create state directory: %w", err))
 	}
 	file, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0644)
 	if err != nil {
 		return mo.Err[*Lock](fmt.Errorf("failed to open lock file: %w", err))
 	}
 	err = syscall.Flock(int(file.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
 	if err != nil {
 		file.Close()
 		return mo.Err[*Lock](fmt.Errorf("VM operation in progress by another process"))
 	}
 	return mo.Ok(&Lock{file: file})
 }
 // Release releases the lock.
 func (l *Lock) Release() error {
 	if l.file == nil {
 		return nil
 	}
 	syscall.Flock(int(l.file.Fd()), syscall.LOCK_UN)
 	return l.file.Close()
 }
--- a/internal/qmp/client.go
+++ b/internal/qmp/client.go
@ -73,3 +73,88 @@ func (c *Client) Shutdown() mo.Result[struct{}] {
 func (c *Client) Close() error {
 	return c.monitor.Disconnect()
 }
 // AddChardev adds a vhost-user-fs chardev for a virtiofsd socket.
 // This is the first step of hot-mounting a filesystem.
 func (c *Client) AddChardev(id, socketPath string) mo.Result[struct{}] {
 	cmd := fmt.Sprintf(`{"execute":"chardev-add","arguments":{"id":"%s","backend":{"type":"socket","data":{"addr":{"type":"unix","data":{"path":"%s"}},"server":false}}}}`, id, socketPath)
 	raw, err := c.monitor.Run([]byte(cmd))
 	if err != nil {
 		return mo.Err[struct{}](fmt.Errorf("failed to add chardev %s: %w", id, err))
 	}
 	// Check for error in response
 	var resp map[string]interface{}
 	if err := json.Unmarshal(raw, &resp); err == nil {
 		if errObj, ok := resp["error"]; ok {
 			return mo.Err[struct{}](fmt.Errorf("QMP error adding chardev: %v", errObj))
 		}
 	}
 	return mo.Ok(struct{}{})
 }
 // AddVhostUserFsDevice adds a vhost-user-fs-pci device connected to a chardev.
 // This is the second step of hot-mounting a filesystem.
 func (c *Client) AddVhostUserFsDevice(chardevID, tag string) mo.Result[struct{}] {
 	// Device ID must be unique, derive from chardev ID
 	deviceID := "dev_" + chardevID
 	cmd := fmt.Sprintf(`{"execute":"device_add","arguments":{"driver":"vhost-user-fs-pci","chardev":"%s","tag":"%s","id":"%s","queue-size":1024}}`, chardevID, tag, deviceID)
 	raw, err := c.monitor.Run([]byte(cmd))
 	if err != nil {
 		return mo.Err[struct{}](fmt.Errorf("failed to add vhost-user-fs device for %s: %w", tag, err))
 	}
 	// Check for error in response
 	var resp map[string]interface{}
 	if err := json.Unmarshal(raw, &resp); err == nil {
 		if errObj, ok := resp["error"]; ok {
 			return mo.Err[struct{}](fmt.Errorf("QMP error adding device: %v", errObj))
 		}
 	}
 	return mo.Ok(struct{}{})
 }
 // HotMountFilesystem performs a complete hot-mount of a virtiofsd filesystem.
 // Requires the virtiofsd daemon to already be running and listening on socketPath.
 func (c *Client) HotMountFilesystem(tag, socketPath string) mo.Result[struct{}] {
 	// Use tag as chardev ID for simplicity
 	chardevID := tag
 	// Step 1: Add chardev
 	if result := c.AddChardev(chardevID, socketPath); result.IsError() {
 		return result
 	}
 	// Step 2: Add device
 	if result := c.AddVhostUserFsDevice(chardevID, tag); result.IsError() {
 		return result
 	}
 	return mo.Ok(struct{}{})
 }
 // QueryChardevs lists all current chardevs to check if one exists.
 func (c *Client) QueryChardevs() mo.Result[[]string] {
 	cmd := []byte(`{"execute":"query-chardev"}`)
 	raw, err := c.monitor.Run(cmd)
 	if err != nil {
 		return mo.Err[[]string](fmt.Errorf("failed to query chardevs: %w", err))
 	}
 	var resp struct {
 		Return []struct {
 			Label string `json:"label"`
 		} `json:"return"`
 	}
 	if err := json.Unmarshal(raw, &resp); err != nil {
 		return mo.Err[[]string](fmt.Errorf("failed to parse chardev response: %w", err))
 	}
 	var labels []string
 	for _, c := range resp.Return {
 		labels = append(labels, c.Label)
 	}
 	return mo.Ok(labels)
 }
--- a/internal/vm/lifecycle.go
+++ b/internal/vm/lifecycle.go
@ -5,7 +5,10 @@ import (
 	"os"
 	"os/exec"
 	"qvm/internal/config"
 	"qvm/internal/lock"
 	"qvm/internal/logging"
 	"qvm/internal/qmp"
 	"qvm/internal/virtiofsd"
 	"qvm/internal/workspace"
 	"strconv"
 	"strings"
@ -22,45 +25,48 @@ type VMStatus struct {
 	SSHPort int
 }
-// Mount represents a 9p filesystem mount
+// Start launches the VM with all configured mounts using virtiofsd.
 type Mount struct {
 	Tag      string
 	HostPath string
 }
 // Start launches the VM with all configured mounts.
 // Sequence:
-//  1. Check if VM is already running (via PID file and process check)
+//  1. Acquire exclusive lock (prevents race conditions from concurrent qvm run)
-//  2. Ensure all required directories exist
+//  2. Check if VM is already running (via PID file and process check)
-//  3. Build mount list (cache mounts + workspace mounts from registry)
+//  3. Clean up any stale state files from previous failed starts
-//  4. Find available SSH port
+//  4. Ensure all required directories exist
-//  5. Build and start VM via runner script with 9p virtfs mounts
+//  5. Start virtiofsd daemons for all mounts (cache + workspaces)
-//  6. Write PID and SSH port to state files
+//  6. Find available SSH port
-//  7. Wait for SSH to become available (60 second timeout)
+//  7. Build and start QEMU with vhost-user-fs-pci devices
 //  8. Write PID and SSH port to state files
 //  9. Wait for SSH to become available (120 second timeout)
 //
 // Returns error if any step fails.
 func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] {
-	// 1. Check if already running
+	// 1. Acquire exclusive lock
 	lockResult := lock.Acquire(30 * time.Second)
 	if lockResult.IsError() {
 		return mo.Err[struct{}](lockResult.Error())
 	}
 	vmLock := lockResult.MustGet()
 	defer vmLock.Release()
 	// 2. Check if already running
 	if IsRunning() {
 		return mo.Err[struct{}](fmt.Errorf("VM is already running"))
 	}
-	// 2. Ensure directories exist
+	// 3. Clean up any stale state files
 	cleanupStateFiles()
 	// 4. Ensure directories exist
 	if err := config.EnsureDirs(); err != nil {
 		return mo.Err[struct{}](fmt.Errorf("failed to create directories: %w", err))
 	}
-	// 2a. Check if base image exists
+	// 4a. Check if base image exists
 	if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) {
 		return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage))
 	}
-	// 2b. Create overlay if it doesn't exist (backed by base image)
+	// 4b. Create overlay if it doesn't exist (backed by base image)
 	if _, err := os.Stat(config.Overlay); os.IsNotExist(err) {
 		if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) {
 			return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage))
 		}
 		logging.Info("Creating overlay image backed by base image...")
 		cmd := exec.Command("qemu-img", "create", "-f", "qcow2",
 			"-F", "qcow2", "-b", config.BaseImage, config.Overlay)
@ -69,46 +75,49 @@ func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] {
 		}
 	}
-	// 3. Build mount list (for 9p virtfs)
+	// 5. Build mount list and start virtiofsd daemons
-	mounts := []Mount{
+	mounts := virtiofsd.DefaultCacheMounts()
 		{Tag: "cargo_home", HostPath: config.CargoHome},
 		{Tag: "cargo_target", HostPath: config.CargoTarget},
 		{Tag: "pnpm_store", HostPath: config.PnpmStore},
 		{Tag: "sccache", HostPath: config.Sccache},
 	}
 	// Add opencode config mount if directory exists
 	if _, err := os.Stat(config.HostOpencodeConfig); err == nil {
-		mounts = append(mounts, Mount{
+		mounts = append(mounts, virtiofsd.Mount{
-			Tag:      "opencode_config",
+			Tag:        "opencode_config",
-			HostPath: config.HostOpencodeConfig,
+			HostPath:   config.HostOpencodeConfig,
 			SocketPath: config.StateDir + "/opencode_config.sock",
 		})
 	}
 	// Add workspace mounts from registry
 	for _, ws := range reg.List() {
-		mounts = append(mounts, Mount{
+		mounts = append(mounts, virtiofsd.WorkspaceMount(ws.MountTag, ws.HostPath))
 			Tag:      ws.MountTag,
 			HostPath: ws.HostPath,
 		})
 	}
-	// 4. Find available SSH port
+	// Start virtiofsd manager
 	vfsManager := virtiofsd.NewManager(config.StateDir)
 	startResult := vfsManager.StartAll(mounts)
 	if startResult.IsError() {
 		return mo.Err[struct{}](fmt.Errorf("failed to start virtiofsd daemons: %w", startResult.Error()))
 	}
 	// 6. Find available SSH port
 	sshPort, err := findAvailablePort(2222)
 	if err != nil {
 		vfsManager.StopAll()
 		return mo.Err[struct{}](fmt.Errorf("failed to find available SSH port: %w", err))
 	}
-	// 5. Build QEMU command and start VM directly
+	// 7. Build QEMU command and start VM
-	args := buildQEMUArgs(cfg, sshPort, mounts)
+	args := buildQEMUCommand(cfg, sshPort, mounts)
-	cmd := exec.Command("qemu-system-x86_64", args...)
+	logging.Info(fmt.Sprintf("Starting QEMU with %d mounts...", len(mounts)))
 	cmd := exec.Command("qemu-system-x86_64", args...)
 	cmd.Stdout = nil
 	cmd.Stderr = nil
 	cmd.Stdin = nil
 	if err := cmd.Run(); err != nil {
-		return mo.Err[struct{}](fmt.Errorf("failed to start QEMU: %w", err))
+		vfsManager.StopAll()
 		return mo.Err[struct{}](fmt.Errorf("failed to start QEMU: %w\n\nTry running 'qvm doctor' to diagnose the issue.", err))
 	}
 	logging.Info("Waiting for VM to daemonize...")
@ -122,11 +131,13 @@ func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] {
 	}
 	if !pidFileReady {
-		return mo.Err[struct{}](fmt.Errorf("QEMU did not create PID file after 5 seconds"))
+		vfsManager.StopAll()
 		return mo.Err[struct{}](fmt.Errorf("QEMU did not create PID file after 5 seconds\n\nTry running 'qvm doctor' to diagnose the issue."))
 	}
 	pidBytes, err := os.ReadFile(config.PIDFile)
 	if err != nil {
 		vfsManager.StopAll()
 		return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err))
 	}
 	pid := strings.TrimSpace(string(pidBytes))
@ -134,43 +145,121 @@ func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] {
 	logging.Info("VM started with PID " + pid)
 	if err := os.WriteFile(config.SSHPortFile, []byte(strconv.Itoa(sshPort)), 0644); err != nil {
-		if pidBytes, err := os.ReadFile(config.PIDFile); err == nil {
+		stopQEMU()
-			if pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))); err == nil {
+		vfsManager.StopAll()
 				if process, err := os.FindProcess(pid); err == nil {
 					_ = process.Kill()
 				}
 			}
 		}
 		_ = os.Remove(config.PIDFile)
 		return mo.Err[struct{}](fmt.Errorf("failed to write SSH port file: %w", err))
 	}
-	// 7. Wait for SSH
+	// 9. Wait for SSH
 	if err := waitForSSH(sshPort, 120*time.Second); err != nil {
-		_ = cmd.Process.Kill()
+		stopQEMU()
-		_ = os.Remove(config.PIDFile)
+		vfsManager.StopAll()
-		_ = os.Remove(config.SSHPortFile)
+		cleanupStateFiles()
-		return mo.Err[struct{}](fmt.Errorf("VM started but SSH not available: %w", err))
+		return mo.Err[struct{}](fmt.Errorf("VM started but SSH not available: %w\n\nTry running 'qvm doctor' to diagnose the issue.", err))
 	}
 	return mo.Ok(struct{}{})
 }
-// Stop gracefully shuts down the VM.
+// HotMountWorkspace mounts a new workspace into a running VM without restart.
 // This uses QMP to dynamically add a vhost-user-fs-pci device.
 func HotMountWorkspace(ws *workspace.Workspace) mo.Result[struct{}] {
 	if !IsRunning() {
 		return mo.Err[struct{}](fmt.Errorf("VM is not running"))
 	}
 	// Acquire lock to prevent concurrent hot-mounts
 	lockResult := lock.Acquire(10 * time.Second)
 	if lockResult.IsError() {
 		return mo.Err[struct{}](lockResult.Error())
 	}
 	vmLock := lockResult.MustGet()
 	defer vmLock.Release()
 	// Start virtiofsd for this workspace
 	mount := virtiofsd.WorkspaceMount(ws.MountTag, ws.HostPath)
 	vfsManager := virtiofsd.NewManager(config.StateDir)
 	startResult := vfsManager.StartMount(mount)
 	if startResult.IsError() {
 		return mo.Err[struct{}](fmt.Errorf("failed to start virtiofsd: %w", startResult.Error()))
 	}
 	// Connect to QMP and hot-add the device
 	qmpResult := qmp.Connect(config.QMPSocket)
 	if qmpResult.IsError() {
 		vfsManager.StopMount(mount)
 		return mo.Err[struct{}](fmt.Errorf("failed to connect to QMP: %w", qmpResult.Error()))
 	}
 	client := qmpResult.MustGet()
 	defer client.Close()
 	// Hot-mount the filesystem
 	hotMountResult := client.HotMountFilesystem(ws.MountTag, mount.SocketPath)
 	if hotMountResult.IsError() {
 		vfsManager.StopMount(mount)
 		return mo.Err[struct{}](fmt.Errorf("failed to hot-mount filesystem: %w", hotMountResult.Error()))
 	}
 	logging.Info(fmt.Sprintf("Hot-mounted workspace %s", ws.MountTag))
 	return mo.Ok(struct{}{})
 }
 // IsWorkspaceMounted checks if a workspace is already mounted in the running VM.
 func IsWorkspaceMounted(ws *workspace.Workspace) bool {
 	if !IsRunning() {
 		return false
 	}
 	qmpResult := qmp.Connect(config.QMPSocket)
 	if qmpResult.IsError() {
 		return false
 	}
 	client := qmpResult.MustGet()
 	defer client.Close()
 	chardevsResult := client.QueryChardevs()
 	if chardevsResult.IsError() {
 		return false
 	}
 	for _, label := range chardevsResult.MustGet() {
 		if label == ws.MountTag {
 			return true
 		}
 	}
 	return false
 }
 // Stop gracefully shuts down the VM and virtiofsd daemons.
 // Sequence:
-//  1. Read PID from file
+//  1. Acquire lock
-//  2. Send SIGTERM to the process
+//  2. Read PID from file
-//  3. Wait up to 30 seconds for graceful shutdown (poll every second)
+//  3. Send SIGTERM to the process
-//  4. If still running, send SIGKILL
+//  4. Wait up to 30 seconds for graceful shutdown
-//  5. Clean up PID and port files
+//  5. If still running, send SIGKILL
 //  6. Stop all virtiofsd daemons
 //  7. Clean up state files
 //
 // Returns success even if VM is not running (idempotent).
 func Stop() mo.Result[struct{}] {
-	// 1. Read PID file
+	// Acquire lock
 	lockResult := lock.Acquire(30 * time.Second)
 	if lockResult.IsError() {
 		return mo.Err[struct{}](lockResult.Error())
 	}
 	vmLock := lockResult.MustGet()
 	defer vmLock.Release()
 	// Stop virtiofsd daemons first
 	vfsManager := virtiofsd.NewManager(config.StateDir)
 	vfsManager.StopAll()
 	// Read PID file
 	pidBytes, err := os.ReadFile(config.PIDFile)
 	if err != nil {
 		if os.IsNotExist(err) {
-			// Not running
+			cleanupStateFiles()
 			return mo.Ok(struct{}{})
 		}
 		return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err))
@ -178,48 +267,55 @@ func Stop() mo.Result[struct{}] {
 	pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
 	if err != nil {
 		cleanupStateFiles()
 		return mo.Err[struct{}](fmt.Errorf("invalid PID in file: %w", err))
 	}
 	// Check if process exists
 	process, err := os.FindProcess(pid)
 	if err != nil {
 		// Process doesn't exist, clean up
 		cleanupStateFiles()
 		return mo.Ok(struct{}{})
 	}
-	// 2. Send SIGTERM for graceful shutdown
+	// Send SIGTERM for graceful shutdown
 	if err := process.Signal(syscall.SIGTERM); err != nil {
 		// Process already gone
 		cleanupStateFiles()
 		return mo.Ok(struct{}{})
 	}
-	// 3. Wait up to 30 seconds for process to exit (poll every second)
+	// Wait up to 30 seconds for process to exit
 	for i := 0; i < 30; i++ {
 		time.Sleep(1 * time.Second)
 		// Check if process still exists by sending signal 0
 		if err := process.Signal(syscall.Signal(0)); err != nil {
 			// Process no longer exists
 			cleanupStateFiles()
 			return mo.Ok(struct{}{})
 		}
 	}
-	// 4. Timeout, force kill
+	// Timeout, force kill
 	_ = process.Signal(syscall.SIGKILL)
 	// Wait a moment for SIGKILL to take effect
 	time.Sleep(1 * time.Second)
 	// 5. Clean up state files
 	cleanupStateFiles()
 	return mo.Ok(struct{}{})
 }
 // stopQEMU stops just the QEMU process (used during failed starts)
 func stopQEMU() {
 	pidBytes, err := os.ReadFile(config.PIDFile)
 	if err != nil {
 		return
 	}
 	pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
 	if err != nil {
 		return
 	}
 	if process, err := os.FindProcess(pid); err == nil {
 		_ = process.Kill()
 	}
 }
 // cleanupStateFiles removes all VM state files
 func cleanupStateFiles() {
 	_ = os.Remove(config.PIDFile)
@ -239,7 +335,6 @@ func Status() mo.Result[VMStatus] {
 		return mo.Ok(status)
 	}
 	// Read PID
 	pidBytes, err := os.ReadFile(config.PIDFile)
 	if err != nil {
 		return mo.Err[VMStatus](fmt.Errorf("failed to read PID file: %w", err))
@ -250,7 +345,6 @@ func Status() mo.Result[VMStatus] {
 		return mo.Err[VMStatus](fmt.Errorf("invalid PID in file: %w", err))
 	}
 	// Read SSH port
 	portBytes, err := os.ReadFile(config.SSHPortFile)
 	if err != nil {
 		return mo.Err[VMStatus](fmt.Errorf("failed to read SSH port file: %w", err))
@ -269,15 +363,12 @@ func Status() mo.Result[VMStatus] {
 }
 // Reset stops the VM and deletes the overlay image.
 // This returns the VM to a fresh state based on the base image.
 func Reset() mo.Result[struct{}] {
 	// Stop VM if running
 	stopResult := Stop()
 	if stopResult.IsError() {
 		return mo.Err[struct{}](fmt.Errorf("failed to stop VM: %w", stopResult.Error()))
 	}
 	// Delete overlay image
 	if err := os.Remove(config.Overlay); err != nil && !os.IsNotExist(err) {
 		return mo.Err[struct{}](fmt.Errorf("failed to delete overlay: %w", err))
 	}
@ -285,8 +376,7 @@ func Reset() mo.Result[struct{}] {
 	return mo.Ok(struct{}{})
 }
-// IsRunning performs a quick check if the VM is running by checking
+// IsRunning performs a quick check if the VM is running.
 // the PID file and verifying the process exists.
 func IsRunning() bool {
 	pidBytes, err := os.ReadFile(config.PIDFile)
 	if err != nil {
@ -298,7 +388,6 @@ func IsRunning() bool {
 		return false
 	}
 	// Check if process exists by sending signal 0
 	process, err := os.FindProcess(pid)
 	if err != nil {
 		return false
@ -308,35 +397,6 @@ func IsRunning() bool {
 	return err == nil
 }
 func buildQEMUArgs(cfg *config.Config, sshPort int, mounts []Mount) []string {
 	// Boot directly from the qcow2 disk image (has GRUB installed)
 	// Do NOT use -kernel/-initrd - that's for NixOS VM runner which requires special 9p mounts
 	args := []string{
 		"-machine", "q35",
 		"-accel", "kvm",
 		"-cpu", "host",
 		"-m", cfg.VM.Memory,
 		"-smp", strconv.Itoa(cfg.VM.CPUs),
 		"-display", "none",
 		"-daemonize",
 		"-pidfile", config.PIDFile,
 		"-drive", fmt.Sprintf("file=%s,if=virtio,format=qcow2", config.Overlay),
 		"-netdev", fmt.Sprintf("user,id=n0,hostfwd=tcp::%d-:22", sshPort),
 		"-device", "virtio-net-pci,netdev=n0",
 		"-serial", fmt.Sprintf("file:%s", config.SerialLog),
 	}
 	// Add 9p mounts for cache directories and workspaces
 	for _, mount := range mounts {
 		args = append(args,
 			"-virtfs", fmt.Sprintf("local,path=%s,mount_tag=%s,security_model=mapped-xattr,id=%s",
 				mount.HostPath, mount.Tag, mount.Tag),
 		)
 	}
 	return args
 }
 // findAvailablePort finds an available TCP port starting from the given base port.
 func findAvailablePort(basePort int) (int, error) {
 	const maxAttempts = 100
@ -354,7 +414,6 @@ func findAvailablePort(basePort int) (int, error) {
 }
 // waitForSSH waits for SSH to become available on the given port.
 // Uses sshpass with password 'root' to test connection.
 func waitForSSH(port int, timeout time.Duration) error {
 	deadline := time.Now().Add(timeout)
--- a/internal/vm/qemu.go
+++ b/internal/vm/qemu.go
@ -7,42 +7,30 @@ import (
 	"strconv"
 )
 // buildQEMUCommand builds the QEMU command line for virtiofsd-based mounts.
 // Uses vhost-user-fs-pci devices which support hot-plugging.
 func buildQEMUCommand(cfg *config.Config, sshPort int, mounts []virtiofsd.Mount) []string {
 	args := []string{
 		"qemu-system-x86_64",
 		"-enable-kvm",
 	}
 	memSize := cfg.VM.Memory
-	args = append(args,
+
 	// vhost-user-fs requires shared memory backend
 	args := []string{
 		"-machine", "q35",
 		"-accel", "kvm",
 		"-cpu", "host",
 		"-object", fmt.Sprintf("memory-backend-memfd,id=mem,size=%s,share=on", memSize),
 		"-numa", "node,memdev=mem",
 	)
 	args = append(args,
 		"-smp", strconv.Itoa(cfg.VM.CPUs),
 	)
 	args = append(args,
 		"-drive", fmt.Sprintf("if=virtio,file=%s,format=qcow2", config.Overlay),
 	)
 	args = append(args,
 		"-nic", fmt.Sprintf("user,model=virtio-net-pci,hostfwd=tcp::%d-:22", sshPort),
 	)
 	args = append(args,
 		"-serial", fmt.Sprintf("file:%s", config.SerialLog),
 	)
 	args = append(args,
 		"-qmp", fmt.Sprintf("unix:%s,server,nowait", config.QMPSocket),
 	)
 	args = append(args,
 		"-display", "none",
-	)
+		"-daemonize",
 		"-pidfile", config.PIDFile,
 		"-drive", fmt.Sprintf("file=%s,if=virtio,format=qcow2", config.Overlay),
 		"-netdev", fmt.Sprintf("user,id=n0,hostfwd=tcp::%d-:22", sshPort),
 		"-device", "virtio-net-pci,netdev=n0",
 		"-serial", fmt.Sprintf("file:%s", config.SerialLog),
 		"-qmp", fmt.Sprintf("unix:%s,server,nowait", config.QMPSocket),
 	}
 	// Add vhost-user-fs devices for each mount
 	for _, mount := range mounts {
 		args = append(args,
 			"-chardev", fmt.Sprintf("socket,id=%s,path=%s", mount.Tag, mount.SocketPath),