Persist OpenBao secrets in /var/lib and make readiness non-blocking

This commit is contained in:
Joshua Bell 2026-01-27 17:28:39 -06:00
parent 8b54a94c54
commit 15fccd2ff4
6 changed files with 123 additions and 85 deletions

View file

@ -42,7 +42,7 @@
fragments = builtins.attrValues (builtins.mapAttrs ( fragments = builtins.attrValues (builtins.mapAttrs (
name: s: name: s:
let let
secretPath = s.path or ("/run/secrets/" + name); secretPath = s.path or ("/var/lib/openbao-secrets/" + name);
in in
substitute secretPath (s.configChanges or { }) substitute secretPath (s.configChanges or { })
) secrets); ) secrets);
@ -85,7 +85,7 @@
fragments = builtins.attrValues (builtins.mapAttrs ( fragments = builtins.attrValues (builtins.mapAttrs (
name: s: name: s:
let let
secretPath = s.path or ("/run/secrets/" + name); secretPath = s.path or ("/var/lib/openbao-secrets/" + name);
in in
substitute secretPath (s.hmChanges or { }) substitute secretPath (s.hmChanges or { })
) secrets); ) secrets);

View file

@ -318,6 +318,12 @@ in
options.ringofstorms.secretsBao = { options.ringofstorms.secretsBao = {
enable = lib.mkEnableOption "Fetch runtime secrets via OpenBao"; enable = lib.mkEnableOption "Fetch runtime secrets via OpenBao";
secretsBasePath = lib.mkOption {
type = lib.types.str;
default = "/var/lib/openbao-secrets";
description = "Base directory for rendered secrets. Use /var/lib/openbao-secrets for persistence across reboots, or /run/secrets for ephemeral.";
};
zitadelKeyPath = lib.mkOption { zitadelKeyPath = lib.mkOption {
type = lib.types.str; type = lib.types.str;
default = "/machine-key.json"; default = "/machine-key.json";
@ -404,7 +410,7 @@ in
options = { options = {
path = lib.mkOption { path = lib.mkOption {
type = lib.types.str; type = lib.types.str;
default = "/run/secrets/${name}"; default = "${cfg.secretsBasePath}/${name}";
}; };
owner = lib.mkOption { owner = lib.mkOption {
@ -488,14 +494,17 @@ in
sec sec
]; ];
# Persistent secrets directory - survives reboots
systemd.tmpfiles.rules = systemd.tmpfiles.rules =
[ [
"d /run/openbao 0700 root root - -" "d /run/openbao 0700 root root - -"
"f /run/openbao/zitadel.jwt 0400 root root - -" "f /run/openbao/zitadel.jwt 0400 root root - -"
"d /run/secrets 0711 root root - -" # Create base secrets directory (persistent by default)
"d ${cfg.secretsBasePath} 0711 root root - -"
] ]
# Create empty placeholder files for all secret destinations so # Create empty placeholder files for all secret destinations so
# services that reference env files don't fail when offline. # services that reference env files don't fail when offline.
# Important: we do NOT recreate files that already have content (the '-' at end)
++ (lib.unique ( ++ (lib.unique (
lib.concatLists ( lib.concatLists (
lib.mapAttrsToList ( lib.mapAttrsToList (
@ -508,6 +517,8 @@ in
) cfg.secrets ) cfg.secrets
) )
)) ))
# Only create placeholder if file doesn't exist (preserves persisted secrets)
# Using 'f' with '-' for argument means create if not exists, don't truncate if exists
++ (lib.mapAttrsToList ( ++ (lib.mapAttrsToList (
_: secret: _: secret:
"f ${secret.path} ${secret.mode} ${secret.owner} ${secret.group} - -" "f ${secret.path} ${secret.mode} ${secret.owner} ${secret.group} - -"
@ -547,7 +558,7 @@ in
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
pathConfig = { pathConfig = {
PathChanged = "/run/secrets"; PathChanged = cfg.secretsBasePath;
Unit = "openbao-secrets-ready.service"; Unit = "openbao-secrets-ready.service";
TriggerLimitIntervalSec = 30; TriggerLimitIntervalSec = 30;
TriggerLimitBurst = 3; TriggerLimitBurst = 3;
@ -587,8 +598,9 @@ in
) )
{ {
openbao-secrets-ready = { openbao-secrets-ready = {
description = "OpenBao: all configured secrets present"; description = "OpenBao: all configured secrets present (informational)";
wantedBy = [ "multi-user.target" ]; # NOT in wantedBy - this is a passive check, not a startup requirement
# It gets triggered by path units when secrets change
wants = [ "vault-agent.service" ]; wants = [ "vault-agent.service" ];
after = [ "vault-agent.service" ]; after = [ "vault-agent.service" ];
@ -600,18 +612,25 @@ in
UMask = "0077"; UMask = "0077";
ExecStart = pkgs.writeShellScript "openbao-secrets-ready" '' ExecStart = pkgs.writeShellScript "openbao-secrets-ready" ''
#!/usr/bin/env bash #!/usr/bin/env bash
set -euo pipefail set -uo pipefail
missing=0
${lib.concatStringsSep "\n" ( ${lib.concatStringsSep "\n" (
lib.mapAttrsToList (name: secret: '' lib.mapAttrsToList (name: secret: ''
if [ ! -s ${lib.escapeShellArg secret.path} ]; then if [ ! -s ${lib.escapeShellArg secret.path} ]; then
echo "Missing secret: ${secret.path}" >&2 echo "Missing secret: ${secret.path}" >&2
exit 1 missing=1
fi fi
'') cfg.secrets '') cfg.secrets
)} )}
if [ "$missing" -eq 1 ]; then
echo "Some secrets are missing (may be stale or not yet fetched)" >&2
# Don't exit 1 - this is informational only
else
echo "All configured OpenBao secrets present." >&2 echo "All configured OpenBao secrets present." >&2
fi
exit 0
''; '';
}; };
}; };
@ -637,6 +656,9 @@ in
zitadel-mint-jwt = { zitadel-mint-jwt = {
description = "Mint Zitadel access token (JWT) for OpenBao"; description = "Mint Zitadel access token (JWT) for OpenBao";
# Non-blocking: don't add to wantedBy, let timer handle it
# The timer is wantedBy timers.target, so this runs periodically
startLimitIntervalSec = 0; startLimitIntervalSec = 0;
after = [ after = [
@ -656,17 +678,15 @@ in
Type = "oneshot"; Type = "oneshot";
User = "root"; User = "root";
Group = "root"; Group = "root";
Restart = "on-failure"; # Don't restart on failure - timer will retry later
RestartSec = "30s"; # This prevents blocking activation if Zitadel is down
TimeoutStartSec = "2min"; TimeoutStartSec = "2min";
UMask = "0077"; UMask = "0077";
ExecStart = pkgs.writeShellScript "zitadel-mint-jwt-service" '' ExecStart = pkgs.writeShellScript "zitadel-mint-jwt-service" ''
#!/usr/bin/env bash #!/usr/bin/env bash
set -euo pipefail set -uo pipefail
if [ ! -d "/run/openbao" ]; then if [ ! -d "/run/openbao" ]; then
${pkgs.coreutils}/bin/mkdir -p /run/openbao ${pkgs.coreutils}/bin/mkdir -p /run/openbao
@ -724,18 +744,16 @@ in
return 1 return 1
} }
# Check if existing token is still valid
if [ -s "${cfg.zitadelJwtPath}" ] && jwt_is_valid "$(cat "${cfg.zitadelJwtPath}")"; then if [ -s "${cfg.zitadelJwtPath}" ] && jwt_is_valid "$(cat "${cfg.zitadelJwtPath}")"; then
echo "zitadel-mint-jwt: existing token still valid; skipping" >&2 echo "zitadel-mint-jwt: existing token still valid; skipping" >&2
exit 0 exit 0
fi fi
jwt="$(${zitadelMintJwt}/bin/zitadel-mint-jwt)" # Try to mint a new token
jwt=""
if [ -z "$jwt" ] || [ "$jwt" = "null" ]; then if jwt="$(${zitadelMintJwt}/bin/zitadel-mint-jwt 2>&1)"; then
echo "Failed to mint Zitadel access token" >&2 if [ -n "$jwt" ] && [ "$jwt" != "null" ]; then
exit 1
fi
tmp="$(${pkgs.coreutils}/bin/mktemp)" tmp="$(${pkgs.coreutils}/bin/mktemp)"
trap '${pkgs.coreutils}/bin/rm -f "$tmp"' EXIT trap '${pkgs.coreutils}/bin/rm -f "$tmp"' EXIT
${pkgs.coreutils}/bin/printf '%s' "$jwt" > "$tmp" ${pkgs.coreutils}/bin/printf '%s' "$jwt" > "$tmp"
@ -748,6 +766,24 @@ in
# Update the token file (the agent watches it). # Update the token file (the agent watches it).
${pkgs.coreutils}/bin/cat "$tmp" > "${cfg.zitadelJwtPath}" ${pkgs.coreutils}/bin/cat "$tmp" > "${cfg.zitadelJwtPath}"
${pkgs.coreutils}/bin/chmod 0400 "${cfg.zitadelJwtPath}" || true ${pkgs.coreutils}/bin/chmod 0400 "${cfg.zitadelJwtPath}" || true
echo "zitadel-mint-jwt: successfully refreshed token" >&2
exit 0
fi
fi
# Failed to mint new token - check if we have a stale but existing token
if [ -s "${cfg.zitadelJwtPath}" ]; then
echo "zitadel-mint-jwt: failed to refresh, but existing token present (may be stale)" >&2
echo "zitadel-mint-jwt: continuing with stale token; will retry on next timer" >&2
# Exit 0 so we don't block activation - timer will retry
exit 0
fi
# No existing token and failed to mint - this is a problem but don't block
echo "zitadel-mint-jwt: failed to mint token and no existing token available" >&2
echo "zitadel-mint-jwt: services requiring secrets will wait; timer will retry" >&2
# Exit 0 to not block activation - the timer will keep retrying
exit 0
''; '';
}; };
}; };
@ -775,6 +811,8 @@ in
RestartSec = "10s"; RestartSec = "10s";
TimeoutStartSec = "30s"; TimeoutStartSec = "30s";
UMask = "0077"; UMask = "0077";
# Only start if JWT exists (path unit will trigger us when it appears)
ExecCondition = "${pkgs.coreutils}/bin/test -s ${cfg.zitadelJwtPath}";
ExecStart = "${pkgs.openbao}/bin/bao agent -log-level=${lib.escapeShellArg cfg.vaultAgentLogLevel} -config=${mkAgentConfig}"; ExecStart = "${pkgs.openbao}/bin/bao agent -log-level=${lib.escapeShellArg cfg.vaultAgentLogLevel} -config=${mkAgentConfig}";
}; };

View file

@ -90,7 +90,7 @@
owner = "root"; owner = "root";
group = "root"; group = "root";
mode = "0400"; mode = "0400";
path = "/run/secrets/litellm.env"; # Uses default: /var/lib/openbao-secrets/litellm-env
softDepend = [ "litellm" ]; softDepend = [ "litellm" ];
template = '' template = ''
{{- with secret "kv/data/machines/home/openrouter" -}} {{- with secret "kv/data/machines/home/openrouter" -}}

View file

@ -29,7 +29,7 @@ in
host = "0.0.0.0"; host = "0.0.0.0";
openFirewall = false; openFirewall = false;
package = pkgsLitellm.litellm; package = pkgsLitellm.litellm;
environmentFile = "/run/secrets/litellm.env"; environmentFile = "/var/lib/openbao-secrets/litellm-env";
environment = { environment = {
SCARF_NO_ANALYTICS = "True"; SCARF_NO_ANALYTICS = "True";
DO_NOT_TRACK = "True"; DO_NOT_TRACK = "True";

View file

@ -349,7 +349,7 @@
exit 0 exit 0
fi fi
secret="/run/secrets/atuin-key-josh" secret="/var/lib/openbao-secrets/atuin-key-josh"
if [ ! -s "$secret" ]; then if [ ! -s "$secret" ]; then
echo "Missing atuin secret at $secret" >&2 echo "Missing atuin secret at $secret" >&2
exit 1 exit 1

View file

@ -27,7 +27,7 @@
# bao secrets # bao secrets
"/run/openbao" "/run/openbao"
"/run/secrets" "/var/lib/openbao-secrets"
]; ];
files = [ files = [
"/machine-key.json" "/machine-key.json"