Add qvm doctor command to diagnose and fix common issues
This commit is contained in:
parent
2aec01b3b2
commit
eb469f1cd8
8 changed files with 660 additions and 170 deletions
280
cmd/qvm/doctor.go
Normal file
280
cmd/qvm/doctor.go
Normal file
|
|
@ -0,0 +1,280 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"qvm/internal/config"
|
||||
"qvm/internal/vm"
|
||||
"qvm/internal/workspace"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var doctorCmd = &cobra.Command{
|
||||
Use: "doctor",
|
||||
Short: "Diagnose and fix common QVM issues",
|
||||
Long: `Runs diagnostic checks on QVM configuration and state.
|
||||
|
||||
Checks for:
|
||||
- KVM availability and permissions
|
||||
- Required binaries (qemu-system-x86_64, virtiofsd, sshpass, nc)
|
||||
- Base image presence
|
||||
- Stale state files (orphaned PID files, sockets)
|
||||
- VM process state consistency
|
||||
|
||||
If issues are found, provides remediation steps.`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
var issues []string
|
||||
var fixes []string
|
||||
|
||||
fmt.Println("QVM Doctor - Diagnosing issues...")
|
||||
fmt.Println()
|
||||
|
||||
// Check 1: KVM availability
|
||||
fmt.Print("Checking KVM availability... ")
|
||||
if _, err := os.Stat("/dev/kvm"); err != nil {
|
||||
fmt.Println("FAIL")
|
||||
issues = append(issues, "KVM not available (/dev/kvm not found)")
|
||||
fixes = append(fixes, "Ensure KVM is enabled in BIOS and kvm module is loaded: modprobe kvm_intel (or kvm_amd)")
|
||||
} else {
|
||||
// Check KVM permissions
|
||||
f, err := os.OpenFile("/dev/kvm", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
fmt.Println("FAIL")
|
||||
issues = append(issues, fmt.Sprintf("Cannot access /dev/kvm: %v", err))
|
||||
fixes = append(fixes, "Add your user to the kvm group: sudo usermod -aG kvm $USER (then logout/login)")
|
||||
} else {
|
||||
f.Close()
|
||||
fmt.Println("OK")
|
||||
}
|
||||
}
|
||||
|
||||
// Check 2: Required binaries
|
||||
requiredBinaries := []string{
|
||||
"qemu-system-x86_64",
|
||||
"virtiofsd",
|
||||
"sshpass",
|
||||
"nc",
|
||||
"qemu-img",
|
||||
}
|
||||
|
||||
for _, bin := range requiredBinaries {
|
||||
fmt.Printf("Checking for %s... ", bin)
|
||||
if _, err := exec.LookPath(bin); err != nil {
|
||||
fmt.Println("MISSING")
|
||||
issues = append(issues, fmt.Sprintf("Required binary not found: %s", bin))
|
||||
fixes = append(fixes, fmt.Sprintf("Install %s via your package manager or nix", bin))
|
||||
} else {
|
||||
fmt.Println("OK")
|
||||
}
|
||||
}
|
||||
|
||||
// Check 3: Base image
|
||||
fmt.Print("Checking base image... ")
|
||||
if _, err := os.Stat(config.BaseImage); err != nil {
|
||||
fmt.Println("MISSING")
|
||||
issues = append(issues, "Base image not found at "+config.BaseImage)
|
||||
fixes = append(fixes, "Run 'qvm rebuild' to build the base image")
|
||||
} else {
|
||||
fmt.Println("OK")
|
||||
}
|
||||
|
||||
// Check 4: State file consistency
|
||||
fmt.Print("Checking state files... ")
|
||||
stateIssues := checkStateFiles()
|
||||
if len(stateIssues) > 0 {
|
||||
fmt.Println("ISSUES FOUND")
|
||||
issues = append(issues, stateIssues...)
|
||||
fixes = append(fixes, "Run 'qvm doctor --fix' to clean up stale state files")
|
||||
} else {
|
||||
fmt.Println("OK")
|
||||
}
|
||||
|
||||
// Check 5: virtiofsd sockets
|
||||
fmt.Print("Checking virtiofsd sockets... ")
|
||||
socketIssues := checkVirtiofsdSockets()
|
||||
if len(socketIssues) > 0 {
|
||||
fmt.Println("ISSUES FOUND")
|
||||
issues = append(issues, socketIssues...)
|
||||
fixes = append(fixes, "Stale sockets will be cleaned up on next start")
|
||||
} else {
|
||||
fmt.Println("OK")
|
||||
}
|
||||
|
||||
// Check 6: Workspace registry
|
||||
fmt.Print("Checking workspace registry... ")
|
||||
regResult := workspace.Load(config.WorkspacesFile)
|
||||
if regResult.IsError() {
|
||||
fmt.Println("ERROR")
|
||||
issues = append(issues, fmt.Sprintf("Cannot load workspace registry: %v", regResult.Error()))
|
||||
fixes = append(fixes, "The registry will be recreated on next 'qvm run'")
|
||||
} else {
|
||||
reg := regResult.MustGet()
|
||||
workspaces := reg.List()
|
||||
invalidCount := 0
|
||||
for _, ws := range workspaces {
|
||||
if _, err := os.Stat(ws.HostPath); err != nil {
|
||||
invalidCount++
|
||||
}
|
||||
}
|
||||
if invalidCount > 0 {
|
||||
fmt.Printf("WARNING (%d workspaces with missing host paths)\n", invalidCount)
|
||||
} else {
|
||||
fmt.Printf("OK (%d workspaces registered)\n", len(workspaces))
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
|
||||
// Summary
|
||||
if len(issues) == 0 {
|
||||
fmt.Println("All checks passed!")
|
||||
} else {
|
||||
fmt.Printf("Found %d issue(s):\n", len(issues))
|
||||
fmt.Println()
|
||||
for i, issue := range issues {
|
||||
fmt.Printf(" %d. %s\n", i+1, issue)
|
||||
if i < len(fixes) {
|
||||
fmt.Printf(" Fix: %s\n", fixes[i])
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
fixFlag, _ := cmd.Flags().GetBool("fix")
|
||||
if fixFlag {
|
||||
fmt.Println("Applying automatic fixes...")
|
||||
applyFixes()
|
||||
} else {
|
||||
fmt.Println("Run 'qvm doctor --fix' to apply automatic fixes where possible.")
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
doctorCmd.Flags().Bool("fix", false, "Attempt to automatically fix issues")
|
||||
}
|
||||
|
||||
// checkStateFiles verifies that PID files correspond to running processes
|
||||
func checkStateFiles() []string {
|
||||
var issues []string
|
||||
|
||||
// Check if PID file exists but process doesn't
|
||||
pidBytes, err := os.ReadFile(config.PIDFile)
|
||||
if err == nil {
|
||||
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
|
||||
if err == nil {
|
||||
if !vm.IsRunning() {
|
||||
issues = append(issues, fmt.Sprintf("Stale PID file: %s (process %d not running)", config.PIDFile, pid))
|
||||
}
|
||||
} else {
|
||||
issues = append(issues, fmt.Sprintf("Invalid PID file: %s", config.PIDFile))
|
||||
}
|
||||
}
|
||||
|
||||
// Check for orphaned QMP socket
|
||||
if _, err := os.Stat(config.QMPSocket); err == nil && !vm.IsRunning() {
|
||||
issues = append(issues, fmt.Sprintf("Orphaned QMP socket: %s", config.QMPSocket))
|
||||
}
|
||||
|
||||
// Check for orphaned SSH port file
|
||||
if _, err := os.Stat(config.SSHPortFile); err == nil && !vm.IsRunning() {
|
||||
issues = append(issues, fmt.Sprintf("Orphaned SSH port file: %s", config.SSHPortFile))
|
||||
}
|
||||
|
||||
return issues
|
||||
}
|
||||
|
||||
// checkVirtiofsdSockets looks for orphaned virtiofsd sockets
|
||||
func checkVirtiofsdSockets() []string {
|
||||
var issues []string
|
||||
|
||||
pattern := filepath.Join(config.StateDir, "*.sock")
|
||||
sockets, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return issues
|
||||
}
|
||||
|
||||
for _, sock := range sockets {
|
||||
// Skip QMP socket
|
||||
if sock == config.QMPSocket {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if corresponding virtiofsd is running
|
||||
baseName := strings.TrimSuffix(filepath.Base(sock), ".sock")
|
||||
pidFile := filepath.Join(config.StateDir, fmt.Sprintf("virtiofsd-%s.pid", baseName))
|
||||
|
||||
if _, err := os.Stat(pidFile); os.IsNotExist(err) {
|
||||
issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (no PID file)", sock))
|
||||
continue
|
||||
}
|
||||
|
||||
pidBytes, err := os.ReadFile(pidFile)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes)))
|
||||
if err != nil {
|
||||
issues = append(issues, fmt.Sprintf("Invalid virtiofsd PID file: %s", pidFile))
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if process is running
|
||||
process, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (process lookup failed)", sock))
|
||||
continue
|
||||
}
|
||||
|
||||
if err := process.Signal(os.Signal(nil)); err != nil {
|
||||
issues = append(issues, fmt.Sprintf("Orphaned virtiofsd socket: %s (process %d not running)", sock, pid))
|
||||
}
|
||||
}
|
||||
|
||||
return issues
|
||||
}
|
||||
|
||||
// applyFixes attempts to clean up stale state files
|
||||
func applyFixes() {
|
||||
if !vm.IsRunning() {
|
||||
// Clean up all state files
|
||||
fmt.Println(" Cleaning up stale state files...")
|
||||
|
||||
if err := os.Remove(config.PIDFile); err == nil {
|
||||
fmt.Printf(" Removed: %s\n", config.PIDFile)
|
||||
}
|
||||
if err := os.Remove(config.SSHPortFile); err == nil {
|
||||
fmt.Printf(" Removed: %s\n", config.SSHPortFile)
|
||||
}
|
||||
if err := os.Remove(config.QMPSocket); err == nil {
|
||||
fmt.Printf(" Removed: %s\n", config.QMPSocket)
|
||||
}
|
||||
|
||||
// Clean up virtiofsd sockets and PID files
|
||||
pattern := filepath.Join(config.StateDir, "*.sock")
|
||||
sockets, _ := filepath.Glob(pattern)
|
||||
for _, sock := range sockets {
|
||||
if err := os.Remove(sock); err == nil {
|
||||
fmt.Printf(" Removed: %s\n", sock)
|
||||
}
|
||||
}
|
||||
|
||||
pattern = filepath.Join(config.StateDir, "virtiofsd-*.pid")
|
||||
pidFiles, _ := filepath.Glob(pattern)
|
||||
for _, pidFile := range pidFiles {
|
||||
if err := os.Remove(pidFile); err == nil {
|
||||
fmt.Printf(" Removed: %s\n", pidFile)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(" Done!")
|
||||
} else {
|
||||
fmt.Println(" VM is running, cannot clean up state files. Stop VM first with 'qvm stop'.")
|
||||
}
|
||||
}
|
||||
|
|
@ -28,6 +28,7 @@ func init() {
|
|||
rootCmd.AddCommand(rebuildCmd)
|
||||
rootCmd.AddCommand(resetCmd)
|
||||
rootCmd.AddCommand(cleanCmd)
|
||||
rootCmd.AddCommand(doctorCmd)
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
|
|
|||
|
|
@ -23,7 +23,10 @@ var runCmd = &cobra.Command{
|
|||
The current directory is automatically registered as a workspace
|
||||
and mounted into the VM. The command runs in the mounted workspace.
|
||||
|
||||
If no command is provided, starts an interactive zsh shell.`,
|
||||
If no command is provided, starts an interactive zsh shell.
|
||||
|
||||
Unlike previous versions, new workspaces can be hot-mounted into
|
||||
a running VM without requiring a restart.`,
|
||||
Args: cobra.ArbitraryArgs,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
// Default to zsh if no command provided
|
||||
|
|
@ -72,32 +75,24 @@ If no command is provided, starts an interactive zsh shell.`,
|
|||
os.Exit(1)
|
||||
}
|
||||
} else {
|
||||
statusResult := vm.Status()
|
||||
if statusResult.IsError() {
|
||||
logging.Error(statusResult.Error().Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
status := statusResult.MustGet()
|
||||
// Check if workspace is mounted, hot-mount if not
|
||||
if !vm.IsWorkspaceMounted(ws) {
|
||||
logging.Info(fmt.Sprintf("Hot-mounting workspace %s...", ws.Hash))
|
||||
hotMountResult := vm.HotMountWorkspace(ws)
|
||||
if hotMountResult.IsError() {
|
||||
logging.Error(fmt.Sprintf("Failed to hot-mount workspace: %v", hotMountResult.Error()))
|
||||
logging.Info("Falling back to VM restart...")
|
||||
|
||||
checkCmd := exec.Command("sshpass", "-p", "root", "ssh",
|
||||
"-o", "StrictHostKeyChecking=no",
|
||||
"-o", "UserKnownHostsFile=/dev/null",
|
||||
"-o", "LogLevel=ERROR",
|
||||
"-p", strconv.Itoa(status.SSHPort),
|
||||
"root@localhost",
|
||||
fmt.Sprintf("test -d /sys/bus/virtio/drivers/9pnet_virtio/*/mount_tag && grep -q 'ws_%s' /sys/bus/virtio/drivers/9pnet_virtio/*/mount_tag 2>/dev/null", ws.Hash))
|
||||
|
||||
if checkCmd.Run() != nil {
|
||||
logging.Info("Workspace not available in running VM, restarting VM...")
|
||||
stopResult := vm.Stop()
|
||||
if stopResult.IsError() {
|
||||
logging.Error(stopResult.Error().Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
startResult := vm.Start(cfg, reg)
|
||||
if startResult.IsError() {
|
||||
logging.Error(startResult.Error().Error())
|
||||
os.Exit(1)
|
||||
stopResult := vm.Stop()
|
||||
if stopResult.IsError() {
|
||||
logging.Error(stopResult.Error().Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
startResult := vm.Start(cfg, reg)
|
||||
if startResult.IsError() {
|
||||
logging.Error(startResult.Error().Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -109,8 +104,11 @@ If no command is provided, starts an interactive zsh shell.`,
|
|||
}
|
||||
status := statusResult.MustGet()
|
||||
|
||||
remoteCmd := fmt.Sprintf("mkdir -p '%s' && (mountpoint -q '%s' || mount -t 9p ws_%s '%s' -o trans=virtio,version=9p2000.L,msize=104857600) && cd '%s' && %s",
|
||||
ws.GuestPath, ws.GuestPath, ws.Hash, ws.GuestPath, ws.GuestPath, strings.Join(args, " "))
|
||||
// Use virtiofs mount instead of 9p
|
||||
// virtiofs mounts use the tag name directly
|
||||
remoteCmd := fmt.Sprintf(
|
||||
"mkdir -p '%s' && (mountpoint -q '%s' || mount -t virtiofs %s '%s') && cd '%s' && %s",
|
||||
ws.GuestPath, ws.GuestPath, ws.MountTag, ws.GuestPath, ws.GuestPath, strings.Join(args, " "))
|
||||
|
||||
sshArgs := []string{
|
||||
"-p", "root",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue