package vm import ( "fmt" "os" "os/exec" "qvm/internal/config" "qvm/internal/logging" "qvm/internal/workspace" "strconv" "strings" "syscall" "time" "github.com/samber/mo" ) // VMStatus represents the current state of the VM type VMStatus struct { Running bool PID int SSHPort int } // Mount represents a 9p filesystem mount type Mount struct { Tag string HostPath string } // Start launches the VM with all configured mounts. // Sequence: // 1. Check if VM is already running (via PID file and process check) // 2. Ensure all required directories exist // 3. Build mount list (cache mounts + workspace mounts from registry) // 4. Find available SSH port // 5. Build and start VM via runner script with 9p virtfs mounts // 6. Write PID and SSH port to state files // 7. Wait for SSH to become available (60 second timeout) // // Returns error if any step fails. func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] { // 1. Check if already running if IsRunning() { return mo.Err[struct{}](fmt.Errorf("VM is already running")) } // 2. Ensure directories exist if err := config.EnsureDirs(); err != nil { return mo.Err[struct{}](fmt.Errorf("failed to create directories: %w", err)) } // 2a. Check if base image exists if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) { return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage)) } // 2b. Create overlay if it doesn't exist (backed by base image) if _, err := os.Stat(config.Overlay); os.IsNotExist(err) { if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) { return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage)) } logging.Info("Creating overlay image backed by base image...") cmd := exec.Command("qemu-img", "create", "-f", "qcow2", "-F", "qcow2", "-b", config.BaseImage, config.Overlay) if output, err := cmd.CombinedOutput(); err != nil { return mo.Err[struct{}](fmt.Errorf("failed to create overlay: %s: %w", string(output), err)) } } // 3. Build mount list (for 9p virtfs) mounts := []Mount{ {Tag: "cargo_home", HostPath: config.CargoHome}, {Tag: "cargo_target", HostPath: config.CargoTarget}, {Tag: "pnpm_store", HostPath: config.PnpmStore}, {Tag: "sccache", HostPath: config.Sccache}, } // Add opencode config mount if directory exists if _, err := os.Stat(config.HostOpencodeConfig); err == nil { mounts = append(mounts, Mount{ Tag: "opencode_config", HostPath: config.HostOpencodeConfig, }) } // Add workspace mounts from registry for _, ws := range reg.List() { mounts = append(mounts, Mount{ Tag: ws.MountTag, HostPath: ws.HostPath, }) } // 4. Find available SSH port sshPort, err := findAvailablePort(2222) if err != nil { return mo.Err[struct{}](fmt.Errorf("failed to find available SSH port: %w", err)) } // 5. Build QEMU command and start VM directly args := buildQEMUArgs(cfg, sshPort, mounts) cmd := exec.Command("qemu-system-x86_64", args...) cmd.Stdout = nil cmd.Stderr = nil cmd.Stdin = nil if err := cmd.Run(); err != nil { return mo.Err[struct{}](fmt.Errorf("failed to start QEMU: %w", err)) } logging.Info("Waiting for VM to daemonize...") pidFileReady := false for i := 0; i < 10; i++ { time.Sleep(500 * time.Millisecond) if _, err := os.Stat(config.PIDFile); err == nil { pidFileReady = true break } } if !pidFileReady { return mo.Err[struct{}](fmt.Errorf("QEMU did not create PID file after 5 seconds")) } pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err)) } pid := strings.TrimSpace(string(pidBytes)) logging.Info("VM started with PID " + pid) if err := os.WriteFile(config.SSHPortFile, []byte(strconv.Itoa(sshPort)), 0644); err != nil { if pidBytes, err := os.ReadFile(config.PIDFile); err == nil { if pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))); err == nil { if process, err := os.FindProcess(pid); err == nil { _ = process.Kill() } } } _ = os.Remove(config.PIDFile) return mo.Err[struct{}](fmt.Errorf("failed to write SSH port file: %w", err)) } // 7. Wait for SSH if err := waitForSSH(sshPort, 120*time.Second); err != nil { _ = cmd.Process.Kill() _ = os.Remove(config.PIDFile) _ = os.Remove(config.SSHPortFile) return mo.Err[struct{}](fmt.Errorf("VM started but SSH not available: %w", err)) } return mo.Ok(struct{}{}) } // Stop gracefully shuts down the VM. // Sequence: // 1. Read PID from file // 2. Send SIGTERM to the process // 3. Wait up to 30 seconds for graceful shutdown (poll every second) // 4. If still running, send SIGKILL // 5. Clean up PID and port files // // Returns success even if VM is not running (idempotent). func Stop() mo.Result[struct{}] { // 1. Read PID file pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { if os.IsNotExist(err) { // Not running return mo.Ok(struct{}{}) } return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err)) } pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) if err != nil { return mo.Err[struct{}](fmt.Errorf("invalid PID in file: %w", err)) } // Check if process exists process, err := os.FindProcess(pid) if err != nil { // Process doesn't exist, clean up cleanupStateFiles() return mo.Ok(struct{}{}) } // 2. Send SIGTERM for graceful shutdown if err := process.Signal(syscall.SIGTERM); err != nil { // Process already gone cleanupStateFiles() return mo.Ok(struct{}{}) } // 3. Wait up to 30 seconds for process to exit (poll every second) for i := 0; i < 30; i++ { time.Sleep(1 * time.Second) // Check if process still exists by sending signal 0 if err := process.Signal(syscall.Signal(0)); err != nil { // Process no longer exists cleanupStateFiles() return mo.Ok(struct{}{}) } } // 4. Timeout, force kill _ = process.Signal(syscall.SIGKILL) // Wait a moment for SIGKILL to take effect time.Sleep(1 * time.Second) // 5. Clean up state files cleanupStateFiles() return mo.Ok(struct{}{}) } // cleanupStateFiles removes all VM state files func cleanupStateFiles() { _ = os.Remove(config.PIDFile) _ = os.Remove(config.SSHPortFile) _ = os.Remove(config.QMPSocket) } // Status returns the current VM status (running, PID, SSH port). func Status() mo.Result[VMStatus] { status := VMStatus{ Running: false, PID: 0, SSHPort: 0, } if !IsRunning() { return mo.Ok(status) } // Read PID pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { return mo.Err[VMStatus](fmt.Errorf("failed to read PID file: %w", err)) } pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) if err != nil { return mo.Err[VMStatus](fmt.Errorf("invalid PID in file: %w", err)) } // Read SSH port portBytes, err := os.ReadFile(config.SSHPortFile) if err != nil { return mo.Err[VMStatus](fmt.Errorf("failed to read SSH port file: %w", err)) } sshPort, err := strconv.Atoi(strings.TrimSpace(string(portBytes))) if err != nil { return mo.Err[VMStatus](fmt.Errorf("invalid SSH port in file: %w", err)) } status.Running = true status.PID = pid status.SSHPort = sshPort return mo.Ok(status) } // Reset stops the VM and deletes the overlay image. // This returns the VM to a fresh state based on the base image. func Reset() mo.Result[struct{}] { // Stop VM if running stopResult := Stop() if stopResult.IsError() { return mo.Err[struct{}](fmt.Errorf("failed to stop VM: %w", stopResult.Error())) } // Delete overlay image if err := os.Remove(config.Overlay); err != nil && !os.IsNotExist(err) { return mo.Err[struct{}](fmt.Errorf("failed to delete overlay: %w", err)) } return mo.Ok(struct{}{}) } // IsRunning performs a quick check if the VM is running by checking // the PID file and verifying the process exists. func IsRunning() bool { pidBytes, err := os.ReadFile(config.PIDFile) if err != nil { return false } pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) if err != nil { return false } // Check if process exists by sending signal 0 process, err := os.FindProcess(pid) if err != nil { return false } err = process.Signal(syscall.Signal(0)) return err == nil } func buildQEMUArgs(cfg *config.Config, sshPort int, mounts []Mount) []string { // Boot directly from the qcow2 disk image (has GRUB installed) // Do NOT use -kernel/-initrd - that's for NixOS VM runner which requires special 9p mounts args := []string{ "-machine", "q35", "-accel", "kvm", "-cpu", "host", "-m", cfg.VM.Memory, "-smp", strconv.Itoa(cfg.VM.CPUs), "-display", "none", "-daemonize", "-pidfile", config.PIDFile, "-drive", fmt.Sprintf("file=%s,if=virtio,format=qcow2", config.Overlay), "-netdev", fmt.Sprintf("user,id=n0,hostfwd=tcp::%d-:22", sshPort), "-device", "virtio-net-pci,netdev=n0", "-serial", fmt.Sprintf("file:%s", config.SerialLog), } // Add 9p mounts for cache directories and workspaces for _, mount := range mounts { args = append(args, "-virtfs", fmt.Sprintf("local,path=%s,mount_tag=%s,security_model=mapped-xattr,id=%s", mount.HostPath, mount.Tag, mount.Tag), ) } return args } // findAvailablePort finds an available TCP port starting from the given base port. func findAvailablePort(basePort int) (int, error) { const maxAttempts = 100 for i := 0; i < maxAttempts; i++ { port := basePort + i cmd := exec.Command("nc", "-z", "localhost", strconv.Itoa(port)) if err := cmd.Run(); err != nil { return port, nil } } return 0, fmt.Errorf("could not find available port after %d attempts", maxAttempts) } // waitForSSH waits for SSH to become available on the given port. // Uses sshpass with password 'root' to test connection. func waitForSSH(port int, timeout time.Duration) error { deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { cmd := exec.Command("sshpass", "-p", "root", "ssh", "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", "-o", "ConnectTimeout=1", "-p", strconv.Itoa(port), "root@localhost", "exit 0") if err := cmd.Run(); err == nil { return nil } time.Sleep(1 * time.Second) } return fmt.Errorf("SSH did not become available within %v", timeout) }