package vm import ( "fmt" "os" "os/exec" "qvm/internal/config" "qvm/internal/lock" "qvm/internal/logging" "qvm/internal/qmp" "qvm/internal/virtiofsd" "qvm/internal/workspace" "strconv" "strings" "syscall" "time" "github.com/samber/mo" ) // VMStatus represents the current state of the VM type VMStatus struct { Running bool PID int SSHPort int } // Start launches the VM with all configured mounts using virtiofsd. // Sequence: // 1. Acquire exclusive lock (prevents race conditions from concurrent qvm run) // 2. Check if VM is already running (via PID file and process check) // 3. Clean up any stale state files from previous failed starts // 4. Ensure all required directories exist // 5. Start virtiofsd daemons for all mounts (cache + workspaces) // 6. Find available SSH port // 7. Build and start QEMU with vhost-user-fs-pci devices // 8. Write PID and SSH port to state files // 9. Wait for SSH to become available (120 second timeout) // // Returns error if any step fails. func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] { // 1. Acquire exclusive lock lockResult := lock.Acquire(30 * time.Second) if lockResult.IsError() { return mo.Err[struct{}](lockResult.Error()) } vmLock := lockResult.MustGet() defer vmLock.Release() // 2. Check if already running if IsRunning() { return mo.Err[struct{}](fmt.Errorf("VM is already running")) } // 3. Clean up any stale state files cleanupStateFiles() // 4. Ensure directories exist if err := config.EnsureDirs(); err != nil { return mo.Err[struct{}](fmt.Errorf("failed to create directories: %w", err)) } // 4a. Check if base image exists if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) { return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage)) } // 4b. Create overlay if it doesn't exist (backed by base image) if _, err := os.Stat(config.Overlay); os.IsNotExist(err) { logging.Info("Creating overlay image backed by base image...") cmd := exec.Command("qemu-img", "create", "-f", "qcow2", "-F", "qcow2", "-b", config.BaseImage, config.Overlay) if output, err := cmd.CombinedOutput(); err != nil { return mo.Err[struct{}](fmt.Errorf("failed to create overlay: %s: %w", string(output), err)) } } // 5. Build mount list and start virtiofsd daemons mounts := virtiofsd.DefaultCacheMounts() // Add opencode config mount if directory exists if _, err := os.Stat(config.HostOpencodeConfig); err == nil { mounts = append(mounts, virtiofsd.Mount{ Tag: "opencode_config", HostPath: config.HostOpencodeConfig, SocketPath: config.StateDir + "/opencode_config.sock", }) } // Add workspace mounts from registry for _, ws := range reg.List() { mounts = append(mounts, virtiofsd.WorkspaceMount(ws.MountTag, ws.HostPath)) } // Start virtiofsd manager vfsManager := virtiofsd.NewManager(config.StateDir) startResult := vfsManager.StartAll(mounts) if startResult.IsError() { return mo.Err[struct{}](fmt.Errorf("failed to start virtiofsd daemons: %w", startResult.Error())) } // 6. Find available SSH port sshPort, err := findAvailablePort(2222) if err != nil { vfsManager.StopAll() return mo.Err[struct{}](fmt.Errorf("failed to find available SSH port: %w", err)) } // 7. Build QEMU command and start VM args := buildQEMUCommand(cfg, sshPort, mounts) logging.Info(fmt.Sprintf("Starting QEMU with %d mounts...", len(mounts))) cmd := exec.Command("qemu-system-x86_64", args...) cmd.Stdin = nil output, err := cmd.CombinedOutput() if err != nil { vfsManager.StopAll() if len(output) > 0 { return mo.Err[struct{}](fmt.Errorf("failed to start QEMU: %w\n\nQEMU output:\n%s", err, string(output))) } return mo.Err[struct{}](fmt.Errorf("failed to start QEMU: %w\n\nTry running 'qvm doctor' to diagnose the issue.", err)) } logging.Info("Waiting for VM to daemonize...") pidFileReady := false for i := 0; i < 10; i++ { time.Sleep(500 * time.Millisecond) if _, err := os.Stat(config.PIDFile); err == nil { pidFileReady = true break } } if !pidFileReady { vfsManager.StopAll() return mo.Err[struct{}](fmt.Errorf("QEMU did not create PID file after 5 seconds\n\nTry running 'qvm doctor' to diagnose the issue.")) } pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { vfsManager.StopAll() return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err)) } pid := strings.TrimSpace(string(pidBytes)) logging.Info("VM started with PID " + pid) if err := os.WriteFile(config.SSHPortFile, []byte(strconv.Itoa(sshPort)), 0644); err != nil { stopQEMU() vfsManager.StopAll() return mo.Err[struct{}](fmt.Errorf("failed to write SSH port file: %w", err)) } // 9. Wait for SSH if err := waitForSSH(sshPort, 120*time.Second); err != nil { stopQEMU() vfsManager.StopAll() cleanupStateFiles() return mo.Err[struct{}](fmt.Errorf("VM started but SSH not available: %w\n\nTry running 'qvm doctor' to diagnose the issue.", err)) } return mo.Ok(struct{}{}) } // HotMountWorkspace mounts a new workspace into a running VM without restart. // This uses QMP to dynamically add a vhost-user-fs-pci device. func HotMountWorkspace(ws *workspace.Workspace) mo.Result[struct{}] { if !IsRunning() { return mo.Err[struct{}](fmt.Errorf("VM is not running")) } // Acquire lock to prevent concurrent hot-mounts lockResult := lock.Acquire(10 * time.Second) if lockResult.IsError() { return mo.Err[struct{}](lockResult.Error()) } vmLock := lockResult.MustGet() defer vmLock.Release() // Start virtiofsd for this workspace mount := virtiofsd.WorkspaceMount(ws.MountTag, ws.HostPath) vfsManager := virtiofsd.NewManager(config.StateDir) startResult := vfsManager.StartMount(mount) if startResult.IsError() { return mo.Err[struct{}](fmt.Errorf("failed to start virtiofsd: %w", startResult.Error())) } // Connect to QMP and hot-add the device qmpResult := qmp.Connect(config.QMPSocket) if qmpResult.IsError() { vfsManager.StopMount(mount) return mo.Err[struct{}](fmt.Errorf("failed to connect to QMP: %w", qmpResult.Error())) } client := qmpResult.MustGet() defer client.Close() // Hot-mount the filesystem hotMountResult := client.HotMountFilesystem(ws.MountTag, mount.SocketPath) if hotMountResult.IsError() { vfsManager.StopMount(mount) return mo.Err[struct{}](fmt.Errorf("failed to hot-mount filesystem: %w", hotMountResult.Error())) } logging.Info(fmt.Sprintf("Hot-mounted workspace %s", ws.MountTag)) return mo.Ok(struct{}{}) } // IsWorkspaceMounted checks if a workspace is already mounted in the running VM. func IsWorkspaceMounted(ws *workspace.Workspace) bool { if !IsRunning() { return false } qmpResult := qmp.Connect(config.QMPSocket) if qmpResult.IsError() { return false } client := qmpResult.MustGet() defer client.Close() chardevsResult := client.QueryChardevs() if chardevsResult.IsError() { return false } for _, label := range chardevsResult.MustGet() { if label == ws.MountTag { return true } } return false } // Stop gracefully shuts down the VM and virtiofsd daemons. // Sequence: // 1. Acquire lock // 2. Read PID from file // 3. Send SIGTERM to the process // 4. Wait up to 30 seconds for graceful shutdown // 5. If still running, send SIGKILL // 6. Stop all virtiofsd daemons // 7. Clean up state files // // Returns success even if VM is not running (idempotent). func Stop() mo.Result[struct{}] { // Acquire lock lockResult := lock.Acquire(30 * time.Second) if lockResult.IsError() { return mo.Err[struct{}](lockResult.Error()) } vmLock := lockResult.MustGet() defer vmLock.Release() // Stop virtiofsd daemons first vfsManager := virtiofsd.NewManager(config.StateDir) vfsManager.StopAll() // Read PID file pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { if os.IsNotExist(err) { cleanupStateFiles() return mo.Ok(struct{}{}) } return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err)) } pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) if err != nil { cleanupStateFiles() return mo.Err[struct{}](fmt.Errorf("invalid PID in file: %w", err)) } // Check if process exists process, err := os.FindProcess(pid) if err != nil { cleanupStateFiles() return mo.Ok(struct{}{}) } // Send SIGTERM for graceful shutdown if err := process.Signal(syscall.SIGTERM); err != nil { cleanupStateFiles() return mo.Ok(struct{}{}) } // Wait up to 30 seconds for process to exit for i := 0; i < 30; i++ { time.Sleep(1 * time.Second) if err := process.Signal(syscall.Signal(0)); err != nil { cleanupStateFiles() return mo.Ok(struct{}{}) } } // Timeout, force kill _ = process.Signal(syscall.SIGKILL) time.Sleep(1 * time.Second) cleanupStateFiles() return mo.Ok(struct{}{}) } // stopQEMU stops just the QEMU process (used during failed starts) func stopQEMU() { pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { return } pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) if err != nil { return } if process, err := os.FindProcess(pid); err == nil { _ = process.Kill() } } // cleanupStateFiles removes all VM state files func cleanupStateFiles() { _ = os.Remove(config.PIDFile) _ = os.Remove(config.SSHPortFile) _ = os.Remove(config.QMPSocket) } // Status returns the current VM status (running, PID, SSH port). func Status() mo.Result[VMStatus] { status := VMStatus{ Running: false, PID: 0, SSHPort: 0, } if !IsRunning() { return mo.Ok(status) } pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { return mo.Err[VMStatus](fmt.Errorf("failed to read PID file: %w", err)) } pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) if err != nil { return mo.Err[VMStatus](fmt.Errorf("invalid PID in file: %w", err)) } portBytes, err := os.ReadFile(config.SSHPortFile) if err != nil { return mo.Err[VMStatus](fmt.Errorf("failed to read SSH port file: %w", err)) } sshPort, err := strconv.Atoi(strings.TrimSpace(string(portBytes))) if err != nil { return mo.Err[VMStatus](fmt.Errorf("invalid SSH port in file: %w", err)) } status.Running = true status.PID = pid status.SSHPort = sshPort return mo.Ok(status) } // Reset stops the VM and deletes the overlay image. func Reset() mo.Result[struct{}] { stopResult := Stop() if stopResult.IsError() { return mo.Err[struct{}](fmt.Errorf("failed to stop VM: %w", stopResult.Error())) } if err := os.Remove(config.Overlay); err != nil && !os.IsNotExist(err) { return mo.Err[struct{}](fmt.Errorf("failed to delete overlay: %w", err)) } return mo.Ok(struct{}{}) } // IsRunning performs a quick check if the VM is running. func IsRunning() bool { pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { return false } pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) if err != nil { return false } process, err := os.FindProcess(pid) if err != nil { return false } err = process.Signal(syscall.Signal(0)) return err == nil } // findAvailablePort finds an available TCP port starting from the given base port. func findAvailablePort(basePort int) (int, error) { const maxAttempts = 100 for i := 0; i < maxAttempts; i++ { port := basePort + i cmd := exec.Command("nc", "-z", "localhost", strconv.Itoa(port)) if err := cmd.Run(); err != nil { return port, nil } } return 0, fmt.Errorf("could not find available port after %d attempts", maxAttempts) } // waitForSSH waits for SSH to become available on the given port. func waitForSSH(port int, timeout time.Duration) error { deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { cmd := exec.Command("sshpass", "-p", "root", "ssh", "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", "-o", "ConnectTimeout=1", "-p", strconv.Itoa(port), "root@localhost", "exit 0") if err := cmd.Run(); err == nil { return nil } time.Sleep(1 * time.Second) } return fmt.Errorf("SSH did not become available within %v", timeout) }