Add qvm doctor command to diagnose and fix common issues

This commit is contained in:
Joshua Bell 2026-01-27 11:54:26 -06:00
parent 2aec01b3b2
commit eb469f1cd8
8 changed files with 660 additions and 170 deletions

280
cmd/qvm/doctor.go Normal file
View file

@ -0,0 +1,280 @@
package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"qvm/internal/config"
"qvm/internal/vm"
"qvm/internal/workspace"
"strconv"
"strings"
"github.com/spf13/cobra"
)
var doctorCmd = &cobra.Command{
Use: "doctor",
Short: "Diagnose and fix common QVM issues",
Long: `Runs diagnostic checks on QVM configuration and state.
Checks for:
- KVM availability and permissions
- Required binaries (qemu-system-x86_64, virtiofsd, sshpass, nc)
- Base image presence
- Stale state files (orphaned PID files, sockets)
- VM process state consistency
If issues are found, provides remediation steps.`,
Run: func(cmd *cobra.Command, args []string) {
var issues []string
var fixes []string
fmt.Println("QVM Doctor - Diagnosing issues...")
fmt.Println()
// Check 1: KVM availability
fmt.Print("Checking KVM availability... ")
if _, err := os.Stat("/dev/kvm"); err != nil {
fmt.Println("FAIL")
issues = append(issues, "KVM not available (/dev/kvm not found)")
fixes = append(fixes, "Ensure KVM is enabled in BIOS and kvm module is loaded: modprobe kvm_intel (or kvm_amd)")
} else {
// Check KVM permissions
f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0)
if err != nil {
fmt.Println("FAIL")
issues = append(issues, fmt.Sprintf("Cannot access /dev/kvm: %v", err))
fixes = append(fixes, "Add your user to the kvm group: sudo usermod -aG kvm $USER (then logout/login)")
} else {
f.Close()
fmt.Println("OK")
}
}
// Check 2: Required binaries
requiredBinaries := []string{
"qemu-system-x86_64",
"virtiofsd",
"sshpass",
"nc",
"qemu-img",
}
for _, bin := range requiredBinaries {
fmt.Printf("Checking for %s... ", bin)
if _, err := exec.LookPath(bin); err != nil {
fmt.Println("MISSING")
issues = append(issues, fmt.Sprintf("Required binary not found: %s", bin))
fixes = append(fixes, fmt.Sprintf("Install %s via your package manager or nix", bin))
} else {
fmt.Println("OK")
}
}
// Check 3: Base image
fmt.Print("Checking base image... ")
if _, err := os.Stat(config.BaseImage); err != nil {
fmt.Println("MISSING")
issues = append(issues, "Base image not found at "+config.BaseImage)
fixes = append(fixes, "Run 'qvm rebuild' to build the base image")
} else {
fmt.Println("OK")
}
// Check 4: State file consistency
fmt.Print("Checking state files... ")
stateIssues := checkStateFiles()
if len(stateIssues) > 0 {
fmt.Println("ISSUES FOUND")
issues = append(issues, stateIssues...)
fixes = append(fixes, "Run 'qvm doctor --fix' to clean up stale state files")
} else {
fmt.Println("OK")
}
// Check 5: virtiofsd sockets
fmt.Print("Checking virtiofsd sockets... ")
socketIssues := checkVirtiofsdSockets()
if len(socketIssues) > 0 {
fmt.Println("ISSUES FOUND")
issues = append(issues, socketIssues...)
fixes = append(fixes, "Stale sockets will be cleaned up on next start")
} else {
fmt.Println("OK")
}
// Check 6: Workspace registry
fmt.Print("Checking workspace registry... ")
regResult := workspace.Load(config.WorkspacesFile)
if regResult.IsError() {
fmt.Println("ERROR")
issues = append(issues, fmt.Sprintf("Cannot load workspace registry: %v", regResult.Error()))
fixes = append(fixes, "The registry will be recreated on next 'qvm run'")
} else {
reg := regResult.MustGet()
workspaces := reg.List()
invalidCount := 0
for _, ws := range workspaces {
if _, err := os.Stat(ws.HostPath); err != nil {
invalidCount++
}
}
if invalidCount > 0 {
fmt.Printf("WARNING (%d workspaces with missing host paths)\n", invalidCount)
} else {
fmt.Printf("OK (%d workspaces registered)\n", len(workspaces))
}
}
fmt.Println()
// Summary
if len(issues) == 0 {
fmt.Println("All checks passed!")
} else {
fmt.Printf("Found %d issue(s):\n", len(issues))
fmt.Println()
for i, issue := range issues {
fmt.Printf(" %d. %s\n", i+1, issue)
if i < len(fixes) {
fmt.Printf(" Fix: %s\n", fixes[i])
}
}
fmt.Println()
fixFlag, _ := cmd.Flags().GetBool("fix")
if fixFlag {
fmt.Println("Applying automatic fixes...")
applyFixes()
} else {
fmt.Println("Run 'qvm doctor --fix' to apply automatic fixes where possible.")
}
}
},
}
func init() {
doctorCmd.Flags().Bool("fix", false, "Attempt to automatically fix issues")
}
// checkStateFiles verifies that PID files correspond to running processes
func checkStateFiles() []string {
var issues []string
// Check if PID file exists but process doesn't
pidBytes, err := os.ReadFile(config.PIDFile)
if err == nil {
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
if err == nil {
if !vm.IsRunning() {
issues = append(issues, fmt.Sprintf("Stale PID file: %s (process %d not running)", config.PIDFile, pid))
}
} else {
issues = append(issues, fmt.Sprintf("Invalid PID file: %s", config.PIDFile))
}
}
// Check for orphaned QMP socket
if _, err := os.Stat(config.QMPSocket); err == nil && !vm.IsRunning() {
issues = append(issues, fmt.Sprintf("Orphaned QMP socket: %s", config.QMPSocket))
}
// Check for orphaned SSH port file
if _, err := os.Stat(config.SSHPortFile); err == nil && !vm.IsRunning() {
issues = append(issues, fmt.Sprintf("Orphaned SSH port file: %s", config.SSHPortFile))
}
return issues
}
// checkVirtiofsdSockets looks for orphaned virtiofsd sockets
func checkVirtiofsdSockets() []string {
var issues []string
pattern := filepath.Join(config.StateDir, "*.sock")
sockets, err := filepath.Glob(pattern)
if err != nil {
return issues
}
for _, sock := range sockets {
// Skip QMP socket
if sock == config.QMPSocket {
continue
}
// Check if corresponding virtiofsd is running
baseName := strings.TrimSuffix(filepath.Base(sock), ".sock")
pidFile := filepath.Join(config.StateDir, fmt.Sprintf("virtiofsd-%s.pid", baseName))
if _, err := os.Stat(pidFile); os.IsNotExist(err) {
issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (no PID file)", sock))
continue
}
pidBytes, err := os.ReadFile(pidFile)
if err != nil {
continue
}
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
if err != nil {
issues = append(issues, fmt.Sprintf("Invalid virtiofsd PID file: %s", pidFile))
continue
}
// Check if process is running
process, err := os.FindProcess(pid)
if err != nil {
issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (process lookup failed)", sock))
continue
}
if err := process.Signal(os.Signal(nil)); err != nil {
issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (process %d not running)", sock, pid))
}
}
return issues
}
// applyFixes attempts to clean up stale state files
func applyFixes() {
if !vm.IsRunning() {
// Clean up all state files
fmt.Println(" Cleaning up stale state files...")
if err := os.Remove(config.PIDFile); err == nil {
fmt.Printf(" Removed: %s\n", config.PIDFile)
}
if err := os.Remove(config.SSHPortFile); err == nil {
fmt.Printf(" Removed: %s\n", config.SSHPortFile)
}
if err := os.Remove(config.QMPSocket); err == nil {
fmt.Printf(" Removed: %s\n", config.QMPSocket)
}
// Clean up virtiofsd sockets and PID files
pattern := filepath.Join(config.StateDir, "*.sock")
sockets, _ := filepath.Glob(pattern)
for _, sock := range sockets {
if err := os.Remove(sock); err == nil {
fmt.Printf(" Removed: %s\n", sock)
}
}
pattern = filepath.Join(config.StateDir, "virtiofsd-*.pid")
pidFiles, _ := filepath.Glob(pattern)
for _, pidFile := range pidFiles {
if err := os.Remove(pidFile); err == nil {
fmt.Printf(" Removed: %s\n", pidFile)
}
}
fmt.Println(" Done!")
} else {
fmt.Println(" VM is running, cannot clean up state files. Stop VM first with 'qvm stop'.")
}
}

View file

@ -28,6 +28,7 @@ func init() {
rootCmd.AddCommand(rebuildCmd)
rootCmd.AddCommand(resetCmd)
rootCmd.AddCommand(cleanCmd)
rootCmd.AddCommand(doctorCmd)
}
func main() {

View file

@ -23,7 +23,10 @@ var runCmd = &cobra.Command{
The current directory is automatically registered as a workspace
and mounted into the VM. The command runs in the mounted workspace.
If no command is provided, starts an interactive zsh shell.`,
If no command is provided, starts an interactive zsh shell.
Unlike previous versions, new workspaces can be hot-mounted into
a running VM without requiring a restart.`,
Args: cobra.ArbitraryArgs,
Run: func(cmd *cobra.Command, args []string) {
// Default to zsh if no command provided
@ -72,32 +75,24 @@ If no command is provided, starts an interactive zsh shell.`,
os.Exit(1)
}
} else {
statusResult := vm.Status()
if statusResult.IsError() {
logging.Error(statusResult.Error().Error())
os.Exit(1)
}
status := statusResult.MustGet()
// Check if workspace is mounted, hot-mount if not
if !vm.IsWorkspaceMounted(ws) {
logging.Info(fmt.Sprintf("Hot-mounting workspace %s...", ws.Hash))
hotMountResult := vm.HotMountWorkspace(ws)
if hotMountResult.IsError() {
logging.Error(fmt.Sprintf("Failed to hot-mount workspace: %v", hotMountResult.Error()))
logging.Info("Falling back to VM restart...")
checkCmd := exec.Command("sshpass", "-p", "root", "ssh",
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "LogLevel=ERROR",
"-p", strconv.Itoa(status.SSHPort),
"root@localhost",
fmt.Sprintf("test -d /sys/bus/virtio/drivers/9pnet_virtio/*/mount_tag && grep -q 'ws_%s' /sys/bus/virtio/drivers/9pnet_virtio/*/mount_tag 2>/dev/null", ws.Hash))
if checkCmd.Run() != nil {
logging.Info("Workspace not available in running VM, restarting VM...")
stopResult := vm.Stop()
if stopResult.IsError() {
logging.Error(stopResult.Error().Error())
os.Exit(1)
}
startResult := vm.Start(cfg, reg)
if startResult.IsError() {
logging.Error(startResult.Error().Error())
os.Exit(1)
stopResult := vm.Stop()
if stopResult.IsError() {
logging.Error(stopResult.Error().Error())
os.Exit(1)
}
startResult := vm.Start(cfg, reg)
if startResult.IsError() {
logging.Error(startResult.Error().Error())
os.Exit(1)
}
}
}
}
@ -109,8 +104,11 @@ If no command is provided, starts an interactive zsh shell.`,
}
status := statusResult.MustGet()
remoteCmd := fmt.Sprintf("mkdir -p '%s' && (mountpoint -q '%s' || mount -t 9p ws_%s '%s' -o trans=virtio,version=9p2000.L,msize=104857600) && cd '%s' && %s",
ws.GuestPath, ws.GuestPath, ws.Hash, ws.GuestPath, ws.GuestPath, strings.Join(args, " "))
// Use virtiofs mount instead of 9p
// virtiofs mounts use the tag name directly
remoteCmd := fmt.Sprintf(
"mkdir -p '%s' && (mountpoint -q '%s' || mount -t virtiofs %s '%s') && cd '%s' && %s",
ws.GuestPath, ws.GuestPath, ws.MountTag, ws.GuestPath, ws.GuestPath, strings.Join(args, " "))
sshArgs := []string{
"-p", "root",