commit 25b1cca0e6d430f7395337ba979268403904f1e6 Author: Joshua Bell Date: Sun Jan 25 22:59:49 2026 -0600 initial plan commit' diff --git a/qemu_dev_vm.md b/qemu_dev_vm.md new file mode 100644 index 0000000..a86b455 --- /dev/null +++ b/qemu_dev_vm.md @@ -0,0 +1,349 @@ +# QVM - Lightweight QEMU Development VM Wrapper + +A standalone CLI tool for running commands in an isolated NixOS VM with persistent state and shared caches. + +--- + +## Motivation + +Complex per-project VM systems create too many qcow2 images (~7GB each). QVM provides a simpler approach: +- **One master image** shared across all projects +- **Persistent overlay** for VM state +- **Shared caches** for cargo, pnpm, etc. +- **Mount any directory** as workspace + +Primary use case: Running AI coding agents (opencode, etc.) in isolation to prevent host filesystem access while maintaining build cache performance. + +--- + +## Security Model + +**Full VM isolation** (not containers): +- Container escapes via kernel exploits are a known attack surface +- VM escapes are rare - hypervisor boundary is fundamentally stronger +- For long unattended AI sessions, VM isolation is the safer choice + +**9p mount restrictions**: +- Only explicitly mounted directories are accessible +- Uses `security_model=mapped-xattr` (no passthrough) +- Host filesystem outside mounts is invisible to VM + +--- + +## Architecture + +``` +HOST VM +──── ── + +~/.local/share/qvm/ + └── base.qcow2 (read-only base image, ~7GB) + +~/.local/state/qvm/ + ├── overlay.qcow2 (persistent VM state, CoW) + ├── vm.pid (QEMU process ID) + ├── ssh.port (forwarded SSH port) + ├── serial.log (console output) + └── workspaces.json (mounted workspace registry) + +~/.cache/qvm/ + ├── cargo-home/ ──9p──▶ /cache/cargo/ + ├── cargo-target/ ──9p──▶ /cache/target/ + ├── pnpm-store/ ──9p──▶ /cache/pnpm/ + └── sccache/ ──9p──▶ /cache/sccache/ + +$(pwd) ──9p──▶ /workspace/project-{hash}/ + +~/.config/qvm/ + └── flake/ (user's NixOS flake definition) + ├── flake.nix + └── flake.lock +``` + +### Multiple Workspaces + +When `qvm run` is called from different directories, each gets mounted simultaneously: +- `/workspace/abc123/` ← `/home/josh/projects/foo` +- `/workspace/def456/` ← `/home/josh/projects/bar` + +The hash is derived from the absolute path. Commands run with CWD set to their workspace. + +### Shared Caches + +Caches are mounted from host, so: +- Cargo dependencies shared across all projects +- pnpm store shared (content-addressable) +- sccache compilation cache shared +- Each project still uses its own Cargo.lock/package.json - different versions coexist + +--- + +## CLI Interface + +```bash +# Run a command in VM (mounts $PWD as workspace, CDs into it) +qvm run opencode +qvm run "cargo build --release" +qvm run bash # interactive shell + +# VM lifecycle +qvm start # start VM daemon if not running +qvm stop # graceful shutdown +qvm status # show VM state, SSH port, mounted workspaces + +# Maintenance +qvm rebuild # rebuild base image from flake +qvm reset # wipe overlay, start fresh (keeps base image) + +# Direct access +qvm ssh # SSH into VM +qvm ssh -c "command" # run command via SSH +``` + +### Behavior Details + +**`qvm run `**: +1. If VM not running, start it (blocking until SSH ready) +2. Mount $PWD into VM if not already mounted (via 9p hotplug or pre-mount) +3. SSH in and execute: `cd /workspace/{hash} && ` +4. Stream stdout/stderr to terminal +5. Exit with command's exit code +6. VM stays running for next command + +**`qvm start`**: +1. Check if VM already running (via pid file) +2. If base.qcow2 missing, run `qvm rebuild` first +3. Create overlay.qcow2 if missing (backed by base.qcow2) +4. Launch QEMU with KVM, virtio, 9p mounts +5. Wait for SSH to become available +6. Print SSH port + +**`qvm stop`**: +1. Send ACPI shutdown to VM +2. Wait for graceful shutdown (timeout 30s) +3. If timeout, SIGKILL QEMU +4. Clean up pid file + +**`qvm rebuild`**: +1. Run `nix build` on ~/.config/qvm/flake +2. Copy result to ~/.local/share/qvm/base.qcow2 +3. If VM running, warn user to restart for changes + +**`qvm reset`**: +1. Stop VM if running +2. Delete overlay.qcow2 +3. Delete workspaces.json +4. Next start creates fresh overlay + +--- + +## Default NixOS Flake + +The tool ships with a default flake template. User can customize at `~/.config/qvm/flake/`. + +### Included by Default + +```nix +{ + # Base system + boot.kernelPackages = pkgs.linuxPackages_latest; + + # Shell + programs.zsh.enable = true; + users.users.dev = { + isNormalUser = true; + shell = pkgs.zsh; + extraGroups = [ "wheel" ]; + }; + + # Essential tools + environment.systemPackages = with pkgs; [ + git + vim + tmux + htop + curl + wget + jq + ripgrep + fd + + # Language tooling (user can remove/customize) + rustup + nodejs_22 + pnpm + python3 + go + ]; + + # SSH server + services.openssh = { + enable = true; + settings.PasswordAuthentication = false; + }; + + # 9p mounts (populated at runtime) + fileSystems."/cache/cargo" = { device = "cargo"; fsType = "9p"; options = [...]; }; + fileSystems."/cache/target" = { device = "target"; fsType = "9p"; options = [...]; }; + fileSystems."/cache/pnpm" = { device = "pnpm"; fsType = "9p"; options = [...]; }; + fileSystems."/cache/sccache" = { device = "sccache"; fsType = "9p"; options = [...]; }; + + # Environment + environment.variables = { + CARGO_HOME = "/cache/cargo"; + CARGO_TARGET_DIR = "/cache/target"; + PNPM_HOME = "/cache/pnpm"; + SCCACHE_DIR = "/cache/sccache"; + }; + + # Disk size + virtualisation.diskSize = 20480; # 20GB +} +``` + +### User Customization + +Users edit `~/.config/qvm/flake/flake.nix` to: +- Add/remove packages +- Change shell configuration +- Add custom NixOS modules +- Pin nixpkgs version +- Include their dotfiles + +After editing: `qvm rebuild` + +--- + +## QEMU Configuration + +```bash +qemu-system-x86_64 \ + -enable-kvm \ + -cpu host \ + -m "${MEMORY:-8G}" \ + -smp "${CPUS:-4}" \ + \ + # Disk (overlay backed by base) + -drive file=overlay.qcow2,format=qcow2,if=virtio \ + \ + # Network (user mode with SSH forward) + -netdev user,id=net0,hostfwd=tcp::${SSH_PORT}-:22 \ + -device virtio-net-pci,netdev=net0 \ + \ + # 9p shares for caches + -virtfs local,path=${CACHE_DIR}/cargo-home,mount_tag=cargo,security_model=mapped-xattr \ + -virtfs local,path=${CACHE_DIR}/cargo-target,mount_tag=target,security_model=mapped-xattr \ + -virtfs local,path=${CACHE_DIR}/pnpm-store,mount_tag=pnpm,security_model=mapped-xattr \ + -virtfs local,path=${CACHE_DIR}/sccache,mount_tag=sccache,security_model=mapped-xattr \ + \ + # 9p shares for workspaces (added dynamically or pre-mounted) + -virtfs local,path=/path/to/project,mount_tag=ws_abc123,security_model=mapped-xattr \ + \ + # Console + -serial file:serial.log \ + -monitor none \ + -nographic \ + \ + # Daemonize + -daemonize \ + -pidfile vm.pid +``` + +### Resource Allocation + +Default: 50% of RAM, 90% of CPUs (can be configured via env vars or config file later) + +--- + +## Implementation Plan + +### Phase 1: Core Scripts + +1. **`qvm` main script** - dispatcher for subcommands +2. **`qvm-start`** - launch QEMU with all mounts +3. **`qvm-stop`** - graceful shutdown +4. **`qvm-run`** - mount workspace + execute command via SSH +5. **`qvm-ssh`** - direct SSH access +6. **`qvm-status`** - show state + +### Phase 2: Image Management + +1. **`qvm-rebuild`** - build image from flake +2. **`qvm-reset`** - wipe overlay +3. **Default flake template** - copy to ~/.config/qvm/flake on first run + +### Phase 3: Polish + +1. **First-run experience** - auto-create dirs, copy default flake, build image +2. **Error handling** - clear messages for common failures +3. **README** - usage docs + +--- + +## File Structure (New Repo) + +``` +qvm/ +├── bin/ +│ ├── qvm # Main dispatcher +│ ├── qvm-start +│ ├── qvm-stop +│ ├── qvm-run +│ ├── qvm-ssh +│ ├── qvm-status +│ ├── qvm-rebuild +│ └── qvm-reset +├── lib/ +│ └── common.sh # Shared functions +├── flake/ +│ ├── flake.nix # Default NixOS flake template +│ └── flake.lock +├── flake.nix # Nix flake for installing qvm itself +├── README.md +└── LICENSE +``` + +--- + +## Edge Cases & Decisions + +| Scenario | Behavior | +|----------|----------| +| `qvm run` with no args | Error: "Usage: qvm run " | +| `qvm run` while image missing | Auto-trigger `qvm rebuild` first | +| `qvm run` from same dir twice | Reuse existing mount, just run command | +| `qvm stop` when not running | No-op, exit 0 | +| `qvm rebuild` while VM running | Warn but proceed; user must restart VM | +| `qvm reset` while VM running | Stop VM first, then reset | +| SSH not ready after 60s | Error with troubleshooting hints | +| QEMU crashes | Detect via pid, clean up state files | + +--- + +## Not In Scope (Explicit Exclusions) + +- **Multi-VM**: Only one VM at a time +- **Per-project configs**: Single global config (use qai for project-specific VMs) +- **Windows/macOS**: Linux + KVM only +- **GUI/Desktop**: Headless only +- **Snapshots**: Just overlay reset, no checkpoint/restore +- **Resource limits**: Trust the VM, no cgroups on host + +--- + +## Dependencies + +- `qemu` (with KVM support) +- `nix` (for building images) +- `openssh` (ssh client) +- `jq` (for workspace registry) + +--- + +## Future Considerations (Not v1) + +- Config file for memory/CPU/ports +- Tab completion for zsh/bash +- Systemd user service for auto-start +- Health checks and auto-restart +- Workspace unmount command diff --git a/vm_base/flake.nix b/vm_base/flake.nix new file mode 100644 index 0000000..6127ca2 --- /dev/null +++ b/vm_base/flake.nix @@ -0,0 +1,210 @@ +{ + description = "Qai base NixOS VM image"; + + inputs = { + home-manager = { + url = "github:rycee/home-manager"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + + flake-utils = { + url = "github:numtide/flake-utils"; + }; + nixos-generators = { + url = "github:nix-community/nixos-generators"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + + common.url = "git+https://git.joshuabell.xyz/ringofstorms/dotfiles?dir=flakes/common"; + opencode.url = "github:anomalyco/opencode?ref=ad4bdd9f0fb7670949b5c47917bb656247ac60ac"; + ros_neovim.url = "git+https://git.joshuabell.xyz/ringofstorms/nvim"; + }; + + outputs = + inputs@{ + self, + nixpkgs, + flake-utils, + nixos-generators, + ... + }: + let + baseModule = + { + config, + pkgs, + lib, + ... + }: + let + stateVersion = "26.05"; + in + { + imports = [ + inputs."home-manager".nixosModules.default + + inputs.ros_neovim.nixosModules.default + + inputs.common.nixosModules.essentials + inputs.common.nixosModules.git + inputs.common.nixosModules.zsh + inputs.common.nixosModules.tmux + + ( + { + ... + }: + { + home-manager = { + useUserPackages = true; + useGlobalPkgs = true; + backupFileExtension = "bak"; + + users.root = { + home.stateVersion = stateVersion; + programs.home-manager.enable = true; + }; + + sharedModules = [ + inputs.common.homeManagerModules.atuin + inputs.common.homeManagerModules.git + inputs.common.homeManagerModules.postgres_cli_options + inputs.common.homeManagerModules.starship + inputs.common.homeManagerModules.zoxide + inputs.common.homeManagerModules.zsh + inputs.common.homeManagerModules.tmux + inputs.common.homeManagerModules.direnv + ]; + }; + } + ) + ]; + + nixpkgs.config = { + allowUnfree = true; + allowUnfreePredicate = (_: true); + }; + + networking.hostName = "qai-base"; + + # SSH enabled for terminal access via WebSocket proxy. + services.openssh = { + enable = true; + settings.PasswordAuthentication = true; + settings.PermitRootLogin = "yes"; + }; + + users.users.root.password = "root"; + + # Avoid slow boots due to wait-online. + systemd.network.wait-online.enable = false; + systemd.services.NetworkManager-wait-online.enable = false; + systemd.services.systemd-networkd-wait-online.enable = false; + + networking.firewall.allowedTCPPorts = [ + 22 + ]; + + # Needed so `nix develop` works inside the VM. + nix.settings.experimental-features = [ + "nix-command" + "flakes" + ]; + + # Host binary cache (QEMU user-net host is reachable at 10.0.2.2). + # Only effective at runtime, not during image build. + networking.hosts."10.0.2.2" = [ "lio" ]; + + # Note: These substituters are for runtime use. The build VM can't reach them. + nix.settings.substituters = lib.mkAfter [ "http://lio:5000" ]; + nix.settings.trusted-public-keys = lib.mkAfter [ + "lio:9jKQ2xJyZjD0AWFzMcLe5dg3s8vOJ3uffujbUkBg4ms=" + ]; + # Fallback timeout so nix doesn't hang if lio is unreachable + nix.settings.connect-timeout = 5; + + time.timeZone = "America/Chicago"; + + # Git 2.35+ blocks repos owned by different uid; 9p shares can trip this. + # Use wildcard to allow all subdirectories under /workspace (task-1, task-2, etc.) + environment.etc."gitconfig".text = '' + [safe] + directory = * + ''; + + programs.zsh.enable = true; + users.users.root.shell = pkgs.zsh; + + environment.systemPackages = with pkgs; [ + zsh + git + htop + vim + inputs.opencode.packages.${pkgs.system}.default + ]; + + environment.shellAliases = { + "oc" = "all_proxy='' http_proxy='' https_proxy='' opencode"; + "occ" = "oc -c"; + }; + + # Default disk is too small for `nix develop` / direnv. + virtualisation.diskSize = 20 * 1024; + + virtualisation.vmVariant = { + virtualisation = { + memorySize = 4096; + cores = 2; + graphics = false; + }; + + virtualisation.forwardPorts = [ + { + from = "host"; + host.port = 2221; + guest.port = 22; + } + ]; + }; + + system.stateVersion = stateVersion; + }; + + in + { + nixosModules.default = baseModule; + } + // flake-utils.lib.eachDefaultSystem ( + system: + let + pkgs = nixpkgs.legacyPackages.${system}; + + baseVm = nixpkgs.lib.nixosSystem { + inherit system; + modules = [ baseModule ]; + }; + in + { + nixosConfigurations.base = baseVm; + + # Runnable VM (./result/bin/run-nixos-vm) + packages.vm = baseVm.config.system.build.vm; + + # Bootable qcow2 disk image (./result/nixos.qcow2) + packages.qcow2 = nixos-generators.nixosGenerate { + inherit system; + format = "qcow"; + modules = [ baseModule ]; + }; + + apps.default = { + type = "app"; + program = "${baseVm.config.system.build.vm}/bin/run-nixos-vm"; + }; + + devShells.default = pkgs.mkShellNoCC { + QEMU_NET_OPTS = "hostfwd=tcp::2221-:22"; + }; + } + ); +}