Replace Bash qvm scripts with Go CLI implementation

This commit is contained in:
Joshua Bell 2026-01-26 20:48:32 -06:00
parent ffb456707f
commit 2a6a333721
27 changed files with 2551 additions and 1702 deletions

379
internal/vm/lifecycle.go Normal file
View file

@ -0,0 +1,379 @@
package vm
import (
"fmt"
"os"
"os/exec"
"qvm/internal/config"
"qvm/internal/logging"
"qvm/internal/workspace"
"strconv"
"strings"
"syscall"
"time"
"github.com/samber/mo"
)
// VMStatus represents the current state of the VM
type VMStatus struct {
Running bool
PID int
SSHPort int
}
// Mount represents a 9p filesystem mount
type Mount struct {
Tag string
HostPath string
}
// Start launches the VM with all configured mounts.
// Sequence:
// 1. Check if VM is already running (via PID file and process check)
// 2. Ensure all required directories exist
// 3. Build mount list (cache mounts + workspace mounts from registry)
// 4. Find available SSH port
// 5. Build and start VM via runner script with 9p virtfs mounts
// 6. Write PID and SSH port to state files
// 7. Wait for SSH to become available (60 second timeout)
//
// Returns error if any step fails.
func Start(cfg *config.Config, reg *workspace.Registry) mo.Result[struct{}] {
// 1. Check if already running
if IsRunning() {
return mo.Err[struct{}](fmt.Errorf("VM is already running"))
}
// 2. Ensure directories exist
if err := config.EnsureDirs(); err != nil {
return mo.Err[struct{}](fmt.Errorf("failed to create directories: %w", err))
}
// 2a. Check if base image exists
if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) {
return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage))
}
// 2b. Create overlay if it doesn't exist (backed by base image)
if _, err := os.Stat(config.Overlay); os.IsNotExist(err) {
if _, err := os.Stat(config.BaseImage); os.IsNotExist(err) {
return mo.Err[struct{}](fmt.Errorf("base image not found at %s - run 'qvm rebuild' first", config.BaseImage))
}
logging.Info("Creating overlay image backed by base image...")
cmd := exec.Command("qemu-img", "create", "-f", "qcow2",
"-F", "qcow2", "-b", config.BaseImage, config.Overlay)
if output, err := cmd.CombinedOutput(); err != nil {
return mo.Err[struct{}](fmt.Errorf("failed to create overlay: %s: %w", string(output), err))
}
}
// 3. Build mount list (for 9p virtfs)
mounts := []Mount{
{Tag: "cargo_home", HostPath: config.CargoHome},
{Tag: "cargo_target", HostPath: config.CargoTarget},
{Tag: "pnpm_store", HostPath: config.PnpmStore},
{Tag: "sccache", HostPath: config.Sccache},
}
// Add opencode config mount if directory exists
if _, err := os.Stat(config.HostOpencodeConfig); err == nil {
mounts = append(mounts, Mount{
Tag: "opencode_config",
HostPath: config.HostOpencodeConfig,
})
}
// Add workspace mounts from registry
for _, ws := range reg.List() {
mounts = append(mounts, Mount{
Tag: ws.MountTag,
HostPath: ws.HostPath,
})
}
// 4. Find available SSH port
sshPort, err := findAvailablePort(2222)
if err != nil {
return mo.Err[struct{}](fmt.Errorf("failed to find available SSH port: %w", err))
}
// 5. Build QEMU command and start VM directly
args := buildQEMUArgs(cfg, sshPort, mounts)
cmd := exec.Command("qemu-system-x86_64", args...)
cmd.Stdout = nil
cmd.Stderr = nil
cmd.Stdin = nil
if err := cmd.Run(); err != nil {
return mo.Err[struct{}](fmt.Errorf("failed to start QEMU: %w", err))
}
logging.Info("Waiting for VM to daemonize...")
pidFileReady := false
for i := 0; i < 10; i++ {
time.Sleep(500 * time.Millisecond)
if _, err := os.Stat(config.PIDFile); err == nil {
pidFileReady = true
break
}
}
if !pidFileReady {
return mo.Err[struct{}](fmt.Errorf("QEMU did not create PID file after 5 seconds"))
}
pidBytes, err := os.ReadFile(config.PIDFile)
if err != nil {
return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err))
}
pid := strings.TrimSpace(string(pidBytes))
logging.Info("VM started with PID " + pid)
if err := os.WriteFile(config.SSHPortFile, []byte(strconv.Itoa(sshPort)), 0644); err != nil {
if pidBytes, err := os.ReadFile(config.PIDFile); err == nil {
if pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))); err == nil {
if process, err := os.FindProcess(pid); err == nil {
_ = process.Kill()
}
}
}
_ = os.Remove(config.PIDFile)
return mo.Err[struct{}](fmt.Errorf("failed to write SSH port file: %w", err))
}
// 7. Wait for SSH
if err := waitForSSH(sshPort, 120*time.Second); err != nil {
_ = cmd.Process.Kill()
_ = os.Remove(config.PIDFile)
_ = os.Remove(config.SSHPortFile)
return mo.Err[struct{}](fmt.Errorf("VM started but SSH not available: %w", err))
}
return mo.Ok(struct{}{})
}
// Stop gracefully shuts down the VM.
// Sequence:
// 1. Read PID from file
// 2. Send SIGTERM to the process
// 3. Wait up to 30 seconds for graceful shutdown (poll every second)
// 4. If still running, send SIGKILL
// 5. Clean up PID and port files
//
// Returns success even if VM is not running (idempotent).
func Stop() mo.Result[struct{}] {
// 1. Read PID file
pidBytes, err := os.ReadFile(config.PIDFile)
if err != nil {
if os.IsNotExist(err) {
// Not running
return mo.Ok(struct{}{})
}
return mo.Err[struct{}](fmt.Errorf("failed to read PID file: %w", err))
}
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
if err != nil {
return mo.Err[struct{}](fmt.Errorf("invalid PID in file: %w", err))
}
// Check if process exists
process, err := os.FindProcess(pid)
if err != nil {
// Process doesn't exist, clean up
cleanupStateFiles()
return mo.Ok(struct{}{})
}
// 2. Send SIGTERM for graceful shutdown
if err := process.Signal(syscall.SIGTERM); err != nil {
// Process already gone
cleanupStateFiles()
return mo.Ok(struct{}{})
}
// 3. Wait up to 30 seconds for process to exit (poll every second)
for i := 0; i < 30; i++ {
time.Sleep(1 * time.Second)
// Check if process still exists by sending signal 0
if err := process.Signal(syscall.Signal(0)); err != nil {
// Process no longer exists
cleanupStateFiles()
return mo.Ok(struct{}{})
}
}
// 4. Timeout, force kill
_ = process.Signal(syscall.SIGKILL)
// Wait a moment for SIGKILL to take effect
time.Sleep(1 * time.Second)
// 5. Clean up state files
cleanupStateFiles()
return mo.Ok(struct{}{})
}
// cleanupStateFiles removes all VM state files
func cleanupStateFiles() {
_ = os.Remove(config.PIDFile)
_ = os.Remove(config.SSHPortFile)
_ = os.Remove(config.QMPSocket)
}
// Status returns the current VM status (running, PID, SSH port).
func Status() mo.Result[VMStatus] {
status := VMStatus{
Running: false,
PID: 0,
SSHPort: 0,
}
if !IsRunning() {
return mo.Ok(status)
}
// Read PID
pidBytes, err := os.ReadFile(config.PIDFile)
if err != nil {
return mo.Err[VMStatus](fmt.Errorf("failed to read PID file: %w", err))
}
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
if err != nil {
return mo.Err[VMStatus](fmt.Errorf("invalid PID in file: %w", err))
}
// Read SSH port
portBytes, err := os.ReadFile(config.SSHPortFile)
if err != nil {
return mo.Err[VMStatus](fmt.Errorf("failed to read SSH port file: %w", err))
}
sshPort, err := strconv.Atoi(strings.TrimSpace(string(portBytes)))
if err != nil {
return mo.Err[VMStatus](fmt.Errorf("invalid SSH port in file: %w", err))
}
status.Running = true
status.PID = pid
status.SSHPort = sshPort
return mo.Ok(status)
}
// Reset stops the VM and deletes the overlay image.
// This returns the VM to a fresh state based on the base image.
func Reset() mo.Result[struct{}] {
// Stop VM if running
stopResult := Stop()
if stopResult.IsError() {
return mo.Err[struct{}](fmt.Errorf("failed to stop VM: %w", stopResult.Error()))
}
// Delete overlay image
if err := os.Remove(config.Overlay); err != nil && !os.IsNotExist(err) {
return mo.Err[struct{}](fmt.Errorf("failed to delete overlay: %w", err))
}
return mo.Ok(struct{}{})
}
// IsRunning performs a quick check if the VM is running by checking
// the PID file and verifying the process exists.
func IsRunning() bool {
pidBytes, err := os.ReadFile(config.PIDFile)
if err != nil {
return false
}
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
if err != nil {
return false
}
// Check if process exists by sending signal 0
process, err := os.FindProcess(pid)
if err != nil {
return false
}
err = process.Signal(syscall.Signal(0))
return err == nil
}
func buildQEMUArgs(cfg *config.Config, sshPort int, mounts []Mount) []string {
// Boot directly from the qcow2 disk image (has GRUB installed)
// Do NOT use -kernel/-initrd - that's for NixOS VM runner which requires special 9p mounts
args := []string{
"-machine", "q35",
"-accel", "kvm",
"-cpu", "host",
"-m", cfg.VM.Memory,
"-smp", strconv.Itoa(cfg.VM.CPUs),
"-display", "none",
"-daemonize",
"-pidfile", config.PIDFile,
"-drive", fmt.Sprintf("file=%s,if=virtio,format=qcow2", config.Overlay),
"-netdev", fmt.Sprintf("user,id=n0,hostfwd=tcp::%d-:22", sshPort),
"-device", "virtio-net-pci,netdev=n0",
"-serial", fmt.Sprintf("file:%s", config.SerialLog),
}
// Add 9p mounts for cache directories and workspaces
for _, mount := range mounts {
args = append(args,
"-virtfs", fmt.Sprintf("local,path=%s,mount_tag=%s,security_model=mapped-xattr,id=%s",
mount.HostPath, mount.Tag, mount.Tag),
)
}
return args
}
// findAvailablePort finds an available TCP port starting from the given base port.
func findAvailablePort(basePort int) (int, error) {
const maxAttempts = 100
for i := 0; i < maxAttempts; i++ {
port := basePort + i
cmd := exec.Command("nc", "-z", "localhost", strconv.Itoa(port))
if err := cmd.Run(); err != nil {
return port, nil
}
}
return 0, fmt.Errorf("could not find available port after %d attempts", maxAttempts)
}
// waitForSSH waits for SSH to become available on the given port.
// Uses sshpass with password 'root' to test connection.
func waitForSSH(port int, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
cmd := exec.Command("sshpass", "-p", "root",
"ssh",
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=1",
"-p", strconv.Itoa(port),
"root@localhost",
"exit 0")
if err := cmd.Run(); err == nil {
return nil
}
time.Sleep(1 * time.Second)
}
return fmt.Errorf("SSH did not become available within %v", timeout)
}