379 lines
10 KiB
Go
379 lines
10 KiB
Go
package vm
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"qvm/internal/config"
|
|
"qvm/internal/logging"
|
|
"qvm/internal/workspace"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/samber/mo"
|
|
)
|
|
|
|
// VMStatus represents the current state of the VM
|
|
type VMStatus struct {
|
|
Running bool
|
|
PID int
|
|
SSHPort int
|
|
}
|
|
|
|
// Mount represents a 9p filesystem mount
|
|
type Mount struct {
|
|
Tag string
|
|
HostPath string
|
|
}
|
|
|
|
// Start launches the VM with all configured mounts.
|
|
// Sequence:
|
|
// 1. Check if VM is already running (via PID file and process check)
|
|
// 2. Ensure all required directories exist
|
|
// 3. Build mount list (cache mounts + workspace mounts from registry)
|
|
// 4. Find available SSH port
|
|
// 5. Build and start VM via runner script with 9p virtfs mounts
|
|
// 6. Write PID and SSH port to state files
|
|
// 7. Wait for SSH to become available (60 second timeout)
|
|
//
|
|
// Returns error if any step fails.
|
|
func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] {
|
|
// 1. Check if already running
|
|
if IsRunning() {
|
|
return mo.Err[struct{}](fmt.Errorf("VM is already running"))
|
|
}
|
|
|
|
// 2. Ensure directories exist
|
|
if err := config.EnsureDirs(); err != nil {
|
|
return mo.Err[struct{}](fmt.Errorf("failed to create directories: %w", err))
|
|
}
|
|
|
|
// 2a. Check if base image exists
|
|
if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) {
|
|
return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage))
|
|
}
|
|
|
|
// 2b. Create overlay if it doesn't exist (backed by base image)
|
|
if _, err := os.Stat(config.Overlay); os.IsNotExist(err) {
|
|
if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) {
|
|
return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage))
|
|
}
|
|
|
|
logging.Info("Creating overlay image backed by base image...")
|
|
cmd := exec.Command("qemu-img", "create", "-f", "qcow2",
|
|
"-F", "qcow2", "-b", config.BaseImage, config.Overlay)
|
|
if output, err := cmd.CombinedOutput(); err != nil {
|
|
return mo.Err[struct{}](fmt.Errorf("failed to create overlay: %s: %w", string(output), err))
|
|
}
|
|
}
|
|
|
|
// 3. Build mount list (for 9p virtfs)
|
|
mounts := []Mount{
|
|
{Tag: "cargo_home", HostPath: config.CargoHome},
|
|
{Tag: "cargo_target", HostPath: config.CargoTarget},
|
|
{Tag: "pnpm_store", HostPath: config.PnpmStore},
|
|
{Tag: "sccache", HostPath: config.Sccache},
|
|
}
|
|
|
|
// Add opencode config mount if directory exists
|
|
if _, err := os.Stat(config.HostOpencodeConfig); err == nil {
|
|
mounts = append(mounts, Mount{
|
|
Tag: "opencode_config",
|
|
HostPath: config.HostOpencodeConfig,
|
|
})
|
|
}
|
|
|
|
// Add workspace mounts from registry
|
|
for _, ws := range reg.List() {
|
|
mounts = append(mounts, Mount{
|
|
Tag: ws.MountTag,
|
|
HostPath: ws.HostPath,
|
|
})
|
|
}
|
|
|
|
// 4. Find available SSH port
|
|
sshPort, err := findAvailablePort(2222)
|
|
if err != nil {
|
|
return mo.Err[struct{}](fmt.Errorf("failed to find available SSH port: %w", err))
|
|
}
|
|
|
|
// 5. Build QEMU command and start VM directly
|
|
args := buildQEMUArgs(cfg, sshPort, mounts)
|
|
cmd := exec.Command("qemu-system-x86_64", args...)
|
|
|
|
cmd.Stdout = nil
|
|
cmd.Stderr = nil
|
|
cmd.Stdin = nil
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
return mo.Err[struct{}](fmt.Errorf("failed to start QEMU: %w", err))
|
|
}
|
|
|
|
logging.Info("Waiting for VM to daemonize...")
|
|
pidFileReady := false
|
|
for i := 0; i < 10; i++ {
|
|
time.Sleep(500 * time.Millisecond)
|
|
if _, err := os.Stat(config.PIDFile); err == nil {
|
|
pidFileReady = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if !pidFileReady {
|
|
return mo.Err[struct{}](fmt.Errorf("QEMU did not create PID file after 5 seconds"))
|
|
}
|
|
|
|
pidBytes, err := os.ReadFile(config.PIDFile)
|
|
if err != nil {
|
|
return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err))
|
|
}
|
|
pid := strings.TrimSpace(string(pidBytes))
|
|
|
|
logging.Info("VM started with PID " + pid)
|
|
|
|
if err := os.WriteFile(config.SSHPortFile, []byte(strconv.Itoa(sshPort)), 0644); err != nil {
|
|
if pidBytes, err := os.ReadFile(config.PIDFile); err == nil {
|
|
if pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))); err == nil {
|
|
if process, err := os.FindProcess(pid); err == nil {
|
|
_ = process.Kill()
|
|
}
|
|
}
|
|
}
|
|
_ = os.Remove(config.PIDFile)
|
|
return mo.Err[struct{}](fmt.Errorf("failed to write SSH port file: %w", err))
|
|
}
|
|
|
|
// 7. Wait for SSH
|
|
if err := waitForSSH(sshPort, 120*time.Second); err != nil {
|
|
_ = cmd.Process.Kill()
|
|
_ = os.Remove(config.PIDFile)
|
|
_ = os.Remove(config.SSHPortFile)
|
|
return mo.Err[struct{}](fmt.Errorf("VM started but SSH not available: %w", err))
|
|
}
|
|
|
|
return mo.Ok(struct{}{})
|
|
}
|
|
|
|
// Stop gracefully shuts down the VM.
|
|
// Sequence:
|
|
// 1. Read PID from file
|
|
// 2. Send SIGTERM to the process
|
|
// 3. Wait up to 30 seconds for graceful shutdown (poll every second)
|
|
// 4. If still running, send SIGKILL
|
|
// 5. Clean up PID and port files
|
|
//
|
|
// Returns success even if VM is not running (idempotent).
|
|
func Stop() mo.Result[struct{}] {
|
|
// 1. Read PID file
|
|
pidBytes, err := os.ReadFile(config.PIDFile)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
// Not running
|
|
return mo.Ok(struct{}{})
|
|
}
|
|
return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err))
|
|
}
|
|
|
|
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
|
|
if err != nil {
|
|
return mo.Err[struct{}](fmt.Errorf("invalid PID in file: %w", err))
|
|
}
|
|
|
|
// Check if process exists
|
|
process, err := os.FindProcess(pid)
|
|
if err != nil {
|
|
// Process doesn't exist, clean up
|
|
cleanupStateFiles()
|
|
return mo.Ok(struct{}{})
|
|
}
|
|
|
|
// 2. Send SIGTERM for graceful shutdown
|
|
if err := process.Signal(syscall.SIGTERM); err != nil {
|
|
// Process already gone
|
|
cleanupStateFiles()
|
|
return mo.Ok(struct{}{})
|
|
}
|
|
|
|
// 3. Wait up to 30 seconds for process to exit (poll every second)
|
|
for i := 0; i < 30; i++ {
|
|
time.Sleep(1 * time.Second)
|
|
|
|
// Check if process still exists by sending signal 0
|
|
if err := process.Signal(syscall.Signal(0)); err != nil {
|
|
// Process no longer exists
|
|
cleanupStateFiles()
|
|
return mo.Ok(struct{}{})
|
|
}
|
|
}
|
|
|
|
// 4. Timeout, force kill
|
|
_ = process.Signal(syscall.SIGKILL)
|
|
|
|
// Wait a moment for SIGKILL to take effect
|
|
time.Sleep(1 * time.Second)
|
|
|
|
// 5. Clean up state files
|
|
cleanupStateFiles()
|
|
|
|
return mo.Ok(struct{}{})
|
|
}
|
|
|
|
// cleanupStateFiles removes all VM state files
|
|
func cleanupStateFiles() {
|
|
_ = os.Remove(config.PIDFile)
|
|
_ = os.Remove(config.SSHPortFile)
|
|
_ = os.Remove(config.QMPSocket)
|
|
}
|
|
|
|
// Status returns the current VM status (running, PID, SSH port).
|
|
func Status() mo.Result[VMStatus] {
|
|
status := VMStatus{
|
|
Running: false,
|
|
PID: 0,
|
|
SSHPort: 0,
|
|
}
|
|
|
|
if !IsRunning() {
|
|
return mo.Ok(status)
|
|
}
|
|
|
|
// Read PID
|
|
pidBytes, err := os.ReadFile(config.PIDFile)
|
|
if err != nil {
|
|
return mo.Err[VMStatus](fmt.Errorf("failed to read PID file: %w", err))
|
|
}
|
|
|
|
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
|
|
if err != nil {
|
|
return mo.Err[VMStatus](fmt.Errorf("invalid PID in file: %w", err))
|
|
}
|
|
|
|
// Read SSH port
|
|
portBytes, err := os.ReadFile(config.SSHPortFile)
|
|
if err != nil {
|
|
return mo.Err[VMStatus](fmt.Errorf("failed to read SSH port file: %w", err))
|
|
}
|
|
|
|
sshPort, err := strconv.Atoi(strings.TrimSpace(string(portBytes)))
|
|
if err != nil {
|
|
return mo.Err[VMStatus](fmt.Errorf("invalid SSH port in file: %w", err))
|
|
}
|
|
|
|
status.Running = true
|
|
status.PID = pid
|
|
status.SSHPort = sshPort
|
|
|
|
return mo.Ok(status)
|
|
}
|
|
|
|
// Reset stops the VM and deletes the overlay image.
|
|
// This returns the VM to a fresh state based on the base image.
|
|
func Reset() mo.Result[struct{}] {
|
|
// Stop VM if running
|
|
stopResult := Stop()
|
|
if stopResult.IsError() {
|
|
return mo.Err[struct{}](fmt.Errorf("failed to stop VM: %w", stopResult.Error()))
|
|
}
|
|
|
|
// Delete overlay image
|
|
if err := os.Remove(config.Overlay); err != nil && !os.IsNotExist(err) {
|
|
return mo.Err[struct{}](fmt.Errorf("failed to delete overlay: %w", err))
|
|
}
|
|
|
|
return mo.Ok(struct{}{})
|
|
}
|
|
|
|
// IsRunning performs a quick check if the VM is running by checking
|
|
// the PID file and verifying the process exists.
|
|
func IsRunning() bool {
|
|
pidBytes, err := os.ReadFile(config.PIDFile)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
// Check if process exists by sending signal 0
|
|
process, err := os.FindProcess(pid)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
err = process.Signal(syscall.Signal(0))
|
|
return err == nil
|
|
}
|
|
|
|
func buildQEMUArgs(cfg *config.Config, sshPort int, mounts []Mount) []string {
|
|
// Boot directly from the qcow2 disk image (has GRUB installed)
|
|
// Do NOT use -kernel/-initrd - that's for NixOS VM runner which requires special 9p mounts
|
|
args := []string{
|
|
"-machine", "q35",
|
|
"-accel", "kvm",
|
|
"-cpu", "host",
|
|
"-m", cfg.VM.Memory,
|
|
"-smp", strconv.Itoa(cfg.VM.CPUs),
|
|
"-display", "none",
|
|
"-daemonize",
|
|
"-pidfile", config.PIDFile,
|
|
"-drive", fmt.Sprintf("file=%s,if=virtio,format=qcow2", config.Overlay),
|
|
"-netdev", fmt.Sprintf("user,id=n0,hostfwd=tcp::%d-:22", sshPort),
|
|
"-device", "virtio-net-pci,netdev=n0",
|
|
"-serial", fmt.Sprintf("file:%s", config.SerialLog),
|
|
}
|
|
|
|
// Add 9p mounts for cache directories and workspaces
|
|
for _, mount := range mounts {
|
|
args = append(args,
|
|
"-virtfs", fmt.Sprintf("local,path=%s,mount_tag=%s,security_model=mapped-xattr,id=%s",
|
|
mount.HostPath, mount.Tag, mount.Tag),
|
|
)
|
|
}
|
|
|
|
return args
|
|
}
|
|
|
|
// findAvailablePort finds an available TCP port starting from the given base port.
|
|
func findAvailablePort(basePort int) (int, error) {
|
|
const maxAttempts = 100
|
|
|
|
for i := 0; i < maxAttempts; i++ {
|
|
port := basePort + i
|
|
|
|
cmd := exec.Command("nc", "-z", "localhost", strconv.Itoa(port))
|
|
if err := cmd.Run(); err != nil {
|
|
return port, nil
|
|
}
|
|
}
|
|
|
|
return 0, fmt.Errorf("could not find available port after %d attempts", maxAttempts)
|
|
}
|
|
|
|
// waitForSSH waits for SSH to become available on the given port.
|
|
// Uses sshpass with password 'root' to test connection.
|
|
func waitForSSH(port int, timeout time.Duration) error {
|
|
deadline := time.Now().Add(timeout)
|
|
|
|
for time.Now().Before(deadline) {
|
|
cmd := exec.Command("sshpass", "-p", "root",
|
|
"ssh",
|
|
"-o", "StrictHostKeyChecking=no",
|
|
"-o", "UserKnownHostsFile=/dev/null",
|
|
"-o", "ConnectTimeout=1",
|
|
"-p", strconv.Itoa(port),
|
|
"root@localhost",
|
|
"exit 0")
|
|
|
|
if err := cmd.Run(); err == nil {
|
|
return nil
|
|
}
|
|
|
|
time.Sleep(1 * time.Second)
|
|
}
|
|
|
|
return fmt.Errorf("SSH did not become available within %v", timeout)
|
|
}
|