#!/usr/bin/env bash
# install.sultix.ai — one-line installer / updater for the sultix
# controller. Idempotent: every run brings the host to the latest
# released version. First run does the bootstrap (admin password,
# master.key, device CA, optional postgres, optional caddy).
# Subsequent runs detect the existing install and offer an update.
#
# Curl-pipe-bash entry:
#
#   curl -fsSL https://install.sultix.ai | bash
#
# Or with flags for unattended runs:
#
#   curl -fsSL https://install.sultix.ai | bash -s -- \
#       --yes --backend=postgres --proxy=caddy \
#       --proxy-domain=admin.example.com --admin-password=auto
#
# Targets v1: Ubuntu / Debian (apt) on amd64 + arm64. macOS and
# others are stubbed via dispatch functions (os_install_docker,
# os_install_unit, …) — adding a platform = filling those stubs.

set -euo pipefail

# ── presentation ──────────────────────────────────────────────────────
# Color codes degrade to plain when not a tty. Used by say/ok/warn/fail
# and the interactive prompts. Output is single-line so a curl-pipe
# rerun produces a clean log.
if [ -t 1 ]; then
    C_DIM='\033[2m'; C_RESET='\033[0m'
    C_CYAN='\033[1;36m'; C_GREEN='\033[1;32m'
    C_YELLOW='\033[1;33m'; C_RED='\033[1;31m'
    # Bodaay teal-monochrome accents for the banner. xterm-256color
    # is ubiquitous on modern Linux; the worst case on a 16-color
    # terminal is the wordmark renders in approximate cyan, which
    # is fine.
    C_TEAL='\033[38;5;43m'        # ≈ #00d7af, bright teal for the wordmark
    C_TEAL_DIM='\033[38;5;30m'    # ≈ #008787, deeper teal for accents
    C_BOLD='\033[1m'
else
    C_DIM=; C_RESET=; C_CYAN=; C_GREEN=; C_YELLOW=; C_RED=
    C_TEAL=; C_TEAL_DIM=; C_BOLD=
fi

# ── install manifest ──────────────────────────────────────────────────
# Tracks what install.sh actually placed on the host so uninstall.sh
# can avoid yanking system-wide things (docker, caddy) that were
# already there before we touched anything. The file is plain shell
# key=value, sourceable. Updated incrementally as each component
# completes. Read by uninstall.sh; ignored by everything else.
MANIFEST_DIR="/etc/sultix"
MANIFEST_FILE="$MANIFEST_DIR/install-manifest"

# Where the controller binary lives. Linux installs to /usr/local/bin
# (root-owned, sudo to write); macOS installs to ~/.local/bin (user-
# owned, no sudo). The macOS choice means the install runs end-to-end
# without ever prompting for a password — important for the
# `curl | bash` flow where stdin has no tty for sudo.
BIN_PATH="/usr/local/bin/sultix"

# Re-points OS-dependent paths after detect_os runs. Bundled here so
# main() makes one call rather than scattering per-OS adjustments.
manifest_relocate_for_os() {
    if [ "$OS" = "darwin" ]; then
        MANIFEST_DIR="$HOME/Library/Application Support/sultix"
        MANIFEST_FILE="$MANIFEST_DIR/install-manifest"
        BIN_PATH="$HOME/.local/bin/sultix"
    fi
}

manifest_init() {
    osudo install -d -m 0755 "$MANIFEST_DIR"
    if [ ! -f "$MANIFEST_FILE" ]; then
        osudo install -m 0644 /dev/null "$MANIFEST_FILE"
    fi
}

manifest_set() {
    # manifest_set <key> <value>
    # Idempotent: replaces the line if the key already exists.
    # GNU sed (linux) takes -i with no arg; BSD sed (darwin) needs -i ''.
    local key="$1" value="$2"
    if [ "${OS:-}" = "darwin" ]; then
        osudo sed -i '' "/^${key}=/d" "$MANIFEST_FILE" 2>/dev/null || true
    else
        osudo sed -i "/^${key}=/d" "$MANIFEST_FILE" 2>/dev/null || true
    fi
    echo "${key}=${value}" | osudo tee -a "$MANIFEST_FILE" > /dev/null
}

say()   { printf "${C_CYAN}▸${C_RESET} %s\n" "$*"; }
ok()    { printf "${C_GREEN}✓${C_RESET} %s\n" "$*"; }
warn()  { printf "${C_YELLOW}!${C_RESET} %s\n" "$*" >&2; }
fail()  { printf "${C_RED}✗${C_RESET} %s\n" "$*" >&2; exit 1; }
hr()    { printf "${C_DIM}────────────────────────────────────────────────${C_RESET}\n"; }

# Linux installs run as root (sudo curl|sudo bash) and need sudo for
# state under /var/lib/sultix or /etc; macOS installs run in user
# context (LaunchAgent) and own everything under ~/Library. osudo
# transparently drops the sudo prefix on darwin so we can write
# state-mutation calls once and have them do the right thing on both.
osudo() {
    if [ "${OS:-}" = "darwin" ]; then
        "$@"
    else
        sudo "$@"
    fi
}

# Wordmark + URL trio, teal-tinted. Printed once per script run as the
# very first piece of output so the user sees something while we hit
# R2 for `latest.json`. The wordmark is figlet font "slant" (`figlet
# -f slant SULTIX`) baked in literally — the heredoc is single-quoted
# so the backslashes inside the art aren't interpreted by printf.
#
# Optional arg: a tag printed after the subtitle (e.g. "v0.4.137"
# or "update available"). Banner shape stays the same across modes
# — only the tag changes — so reruns feel familiar.
print_banner() {
    local tag="${1:-}"
    local subtitle="self-hosted AI agents"
    [ -n "$tag" ] && subtitle="${subtitle}  ·  ${tag}"
    printf "\n"
    printf "${C_TEAL}"
    cat <<'EOF'
       _____ __  ____  ___________  __
      / ___// / / / / /_  __/  _/ |/ /
      \__ \/ / / / /   / /  / / |   /
     ___/ / /_/ / /___/ / _/ / /   |
    /____/\____/_____/_/ /___//_/|_|
EOF
    printf "${C_RESET}"
    printf "${C_TEAL_DIM}    %s${C_RESET}\n" "$subtitle"
    printf "\n"
    printf "    ${C_TEAL}install${C_RESET}    ${C_DIM}curl -fsSL${C_RESET} https://install.sultix.ai ${C_DIM}| sudo bash${C_RESET}\n"
    printf "    ${C_TEAL}remove${C_RESET}     ${C_DIM}curl -fsSL${C_RESET} https://uninstall.sultix.ai ${C_DIM}| bash${C_RESET}\n"
    printf "    ${C_TEAL}android${C_RESET}    https://get.sultix.ai/android   ${C_YELLOW}(beta)${C_RESET}\n"
    printf "\n"
}

# ── flags + interactive defaults ──────────────────────────────────────
ASSUME_YES=0
FORCE=0
BACKEND="sqlite"          # sqlite | postgres
PROXY="none"              # none | caddy
PROXY_DOMAIN=""
PROXY_CERT=""
PROXY_KEY=""
ADMIN_USER="admin"
ADMIN_PASSWORD=""         # empty = auto-generate (interactive prompts)
MASTER_KEY_SOURCE="auto"  # auto | path:/foo/master.key | env
DATA_DIR=""               # filled in by os_default_data_dir
BIND="127.0.0.1:3000"
CLOUDFLARE_API_TOKEN_VAL=""

while [ $# -gt 0 ]; do
    case "$1" in
        --yes|-y)              ASSUME_YES=1 ;;
        --force)               FORCE=1 ;;
        --backend=*)           BACKEND="${1#*=}" ;;
        --proxy=*)             PROXY="${1#*=}" ;;
        --proxy-domain=*)      PROXY_DOMAIN="${1#*=}" ;;
        --proxy-cert=*)        PROXY_CERT="${1#*=}" ;;
        --proxy-key=*)         PROXY_KEY="${1#*=}" ;;
        --admin-user=*)        ADMIN_USER="${1#*=}" ;;
        --admin-password=*)    ADMIN_PASSWORD="${1#*=}" ;;
        --master-key=*)        MASTER_KEY_SOURCE="${1#*=}" ;;
        --data-dir=*)          DATA_DIR="${1#*=}" ;;
        --bind=*)              BIND="${1#*=}" ;;
        --cloudflare-api-token=*) CLOUDFLARE_API_TOKEN_VAL="${1#*=}" ;;
        --help|-h)
            cat <<HELP
sultix installer — fresh install + idempotent updates.

Usage:
    curl -fsSL https://install.sultix.ai | bash
    curl -fsSL https://install.sultix.ai | bash -s -- [flags]

Flags (all optional; interactive prompts ask for unset values):
    --yes, -y               run unattended; never prompt
    --force                 reinstall even if version matches latest
    --backend=sqlite|postgres
                            sqlite (default) or pgvector-in-docker
    --proxy=none|caddy      reverse proxy (default: none)
    --proxy-domain=DOMAIN   public domain that points at this host
                            (caddy + Let's Encrypt)
    --proxy-cert=PATH       BYO TLS cert (caddy + your cert)
    --proxy-key=PATH        BYO TLS key  (paired with --proxy-cert)
    --admin-user=NAME       initial admin username (default: admin)
    --admin-password=auto|<literal>
                            auto = generate 16-char password and print it
    --master-key=auto|path:/path|env
                            auto = first install creates one;
                            path: = import existing master.key;
                            env  = read SULTIX_MASTER_KEY at runtime
    --data-dir=PATH         override default data dir
    --bind=HOST:PORT        admin UI bind address (default 127.0.0.1:3000)
    --cloudflare-api-token=TOKEN
                            for caddy DNS-01 ACME (NAT'd hosts)

Interactive walkthrough is the default when stdin is a tty.
    curl ... | bash         interactive (asks every choice)
    curl ... | bash -s -- --yes
                            unattended (uses defaults; --backend etc.
                            override individual choices)
HELP
            exit 0
            ;;
        *)
            fail "unknown flag: $1 (try --help)"
            ;;
    esac
    shift
done

# Detect "we have a tty so we can prompt." curl-pipe-bash typically
# runs without a tty on stdin, but stdout may still be a tty. We use
# /dev/tty for prompts so --yes can flip the flow without losing the
# ability to print colored output.
INTERACTIVE=1
# Test if we can actually OPEN /dev/tty for read — `[ -e /dev/tty ]`
# returns true even when the tty isn't accessible from this process
# (e.g. ssh without -t, or curl-pipe-bash where stdin is a pipe and
# nothing else is wired). Open + close in a subshell so a failure
# is silent and doesn't mess with our stdio.
if [ "$ASSUME_YES" = "1" ] || ! ( exec 0</dev/tty ) 2>/dev/null; then
    INTERACTIVE=0
fi

confirm() {
    # confirm "Continue?" [Y/n]
    local prompt="$1"
    if [ "$INTERACTIVE" != "1" ]; then
        return 0
    fi
    local ans
    printf "%s [Y/n] " "$prompt" > /dev/tty
    read -r ans < /dev/tty || ans=""
    case "${ans:-Y}" in
        Y|y|YES|yes|"") return 0 ;;
        *) return 1 ;;
    esac
}

ask() {
    # ask "Prompt" "default-value"
    if [ "$INTERACTIVE" != "1" ]; then
        printf "%s\n" "$2"
        return
    fi
    local ans
    printf "%s [%s] " "$1" "$2" > /dev/tty
    read -r ans < /dev/tty || ans=""
    printf "%s\n" "${ans:-$2}"
}

ask_secret() {
    # ask_secret "Prompt"
    # Echoes the entered string. No default. Empty = caller decides.
    if [ "$INTERACTIVE" != "1" ]; then
        printf ""
        return
    fi
    local ans
    printf "%s: " "$1" > /dev/tty
    stty -echo < /dev/tty
    read -r ans < /dev/tty || ans=""
    stty echo < /dev/tty
    printf "\n" > /dev/tty
    printf "%s\n" "$ans"
}

# ── OS detection + abstraction ────────────────────────────────────────
# Each os_* function dispatches to a per-platform implementation. v1
# fills the linux_* family; darwin_* / windows_* return a clear
# "not supported yet" so adding a platform is finding the four
# unimplemented stubs.
detect_os() {
    case "$(uname -s)" in
        Linux)  OS="linux" ;;
        Darwin) OS="darwin" ;;
        MINGW*|MSYS*|CYGWIN*) OS="windows" ;;
        *) fail "unsupported OS: $(uname -s)" ;;
    esac
    case "$(uname -m)" in
        x86_64|amd64) ARCH="amd64" ;;
        aarch64|arm64) ARCH="arm64" ;;
        *) fail "unsupported architecture: $(uname -m)" ;;
    esac
    if [ "$OS" = "linux" ] && [ -r /etc/os-release ]; then
        # shellcheck disable=SC1091
        . /etc/os-release
        DISTRO="${ID:-unknown}"
        DISTRO_VERSION="${VERSION_ID:-unknown}"
        case "$DISTRO" in
            ubuntu|debian) PKG="apt" ;;
            *) PKG="unknown" ;;
        esac
    else
        DISTRO="${OS}"
        DISTRO_VERSION="$(uname -r)"
        PKG="unknown"
    fi
}

os_default_data_dir() {
    # Default data root. 2026-04-29: flipped from /var/lib/sultix to
    # /opt/sultix to consolidate with the binary install path. The
    # split (config under /opt/sultix/.controller, data under
    # /var/lib/sultix) was vestigial and confused every operator.
    # Single root → single thing to back up, one --purge target,
    # cleaner status printout.
    case "$OS" in
        linux)  printf '/opt/sultix\n' ;;
        darwin) printf '%s/Library/Application Support/sultix\n' "$HOME" ;;
        *)      fail "os_default_data_dir: $OS not yet supported" ;;
    esac
}

# os_config_path returns the canonical config.yaml location for this
# OS. Mirrors controller.configPath() in Go so the installer reads
# the same file the controller binary will open at boot.
os_config_path() {
    case "$OS" in
        linux)  printf '/etc/sultix/config.yaml\n' ;;
        darwin) printf '%s/Library/Application Support/sultix/config.yaml\n' "$HOME" ;;
        *)      fail "os_config_path: $OS not yet supported" ;;
    esac
}

# read_existing_data_dir extracts data_dir from an existing
# config.yaml. Empty if no config or no data_dir field. Used to
# detect "we have an install at path X" so re-runs can show the
# operator their current paths and offer migration.
read_existing_data_dir() {
    local cfg
    cfg="$(os_config_path)"
    [ -r "$cfg" ] || return 0
    awk -F: '/^[[:space:]]*data_dir[[:space:]]*:/ {
        gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2)
        gsub(/^"|"$|^'"'"'|'"'"'$/, "", $2)
        print $2
        exit
    }' "$cfg"
}

# print_paths_summary lists every path the controller cares about,
# resolved for the current OS + active config. Called on every
# script entry point — fresh, update, modify, current — so
# operators always see what's where without re-reading the YAML.
# Args:
#   $1  data_dir   (resolved — passed in so we don't re-read mid-run)
print_paths_summary() {
    local data_dir="$1"
    local cfg db_path service_label log_hint
    cfg="$(os_config_path)"
    db_path="$data_dir/db/sultix.db"
    if [ "$OS" = "darwin" ]; then
        service_label="${DARWIN_LABEL:-com.sultix.controller}"
        log_hint="tail -f ${DARWIN_LOG:-$HOME/Library/Logs/sultix-controller.log}"
    else
        service_label="sultix-controller.service"
        log_hint="journalctl -u sultix-controller -f"
    fi
    printf "  binary     %s\n" "$BIN_PATH"
    printf "  config     %s\n" "$cfg"
    printf "  data       %s\n" "$data_dir"
    printf "  db         %s   (sqlite)\n" "$db_path"
    printf "  service    %s\n" "$service_label"
    printf "  logs       %s\n" "$log_hint"
}

os_install_docker() {
    case "$OS" in
        linux)  linux_install_docker "$@" ;;
        darwin) darwin_check_docker "$@" ;;
        *)      fail "os_install_docker: $OS not yet supported" ;;
    esac
}

os_install_unit() {
    case "$OS" in
        linux)  linux_install_systemd "$@" ;;
        darwin) darwin_install_launchd "$@" ;;
        *)      fail "os_install_unit: $OS not yet supported" ;;
    esac
}

os_restart_service() {
    case "$OS" in
        linux)  sudo systemctl restart sultix-controller ;;
        darwin) darwin_restart ;;
        *)      fail "os_restart_service: $OS not yet supported" ;;
    esac
}

os_service_status() {
    # Returns 0 if running, 1 otherwise. No output.
    case "$OS" in
        linux)  systemctl is-active --quiet sultix-controller ;;
        darwin) launchctl print "gui/$(id -u)/${DARWIN_LABEL}" >/dev/null 2>&1 ;;
        *)      return 1 ;;
    esac
}

# ── Linux implementations ─────────────────────────────────────────────
linux_install_docker() {
    if command -v docker >/dev/null 2>&1; then
        ok "docker already installed ($(docker --version 2>/dev/null | head -1))"
        manifest_set docker_installed_by_sultix false
        return 0
    fi
    manifest_set docker_installed_by_sultix true
    # Docker's official installer at https://get.docker.com handles
    # every distro Docker supports (Ubuntu, Debian, Fedora, RHEL,
    # CentOS, openSUSE, Raspberry Pi OS, …) — repo setup, gpg keys,
    # package install, and starting the daemon. Vastly less code
    # than rolling our own apt-keyring + sources.list.d dance, and
    # it's what Docker themselves recommend for first-time setups.
    #
    # Trust model: same as the install.sultix.ai entrypoint — we're
    # already running curl-pipe-bash, adding Docker's official
    # script doesn't change the user's exposure.
    say "installing docker (via get.docker.com — Docker's official installer)"
    curl -fsSL https://get.docker.com | sudo sh > /tmp/docker-install.log 2>&1 \
        || { tail -20 /tmp/docker-install.log >&2; fail "docker install failed (see above)"; }
    rm -f /tmp/docker-install.log
    # The official installer enables + starts dockerd on systemd
    # systems, but be explicit so a hand-edited unit doesn't leave
    # us with a stopped daemon.
    sudo systemctl enable --now docker 2>/dev/null || true
    ok "docker installed ($(docker --version 2>/dev/null | head -1))"
}

linux_install_systemd() {
    # The controller's `sultix install` subcommand writes the
    # systemd unit + creates sultix-ctrl user + dirs. We pass the
    # chosen DATA_DIR through so the unit's WorkingDirectory +
    # SULTIX_DATA_DIR + the config.yaml's data_dir field all agree
    # on a single root. Idempotent — re-runs converge.
    say "running 'sultix install --data-dir=$DATA_DIR'"
    sudo /usr/local/bin/sultix install --data-dir="$DATA_DIR"
    ok "systemd unit + service user ready"
}

# ── Darwin implementations ────────────────────────────────────────────
# macOS runs the controller as a per-user LaunchAgent (not a system
# LaunchDaemon) so it inherits the user's docker context — Docker
# Desktop puts the daemon socket under ~/.docker/run/docker.sock
# which a root daemon can't reach reliably. The trade-off: the
# controller stops when the user logs out. For a single-admin Mac
# that's the right call; multi-user setups would want a different
# topology and aren't on the v1 target list.
DARWIN_LABEL="com.sultix.controller"
DARWIN_PLIST="$HOME/Library/LaunchAgents/${DARWIN_LABEL}.plist"
DARWIN_LOG="$HOME/Library/Logs/sultix-controller.log"

darwin_check_docker() {
    # Don't auto-install — too many flavors (Docker Desktop, colima,
    # OrbStack, Rancher Desktop, podman-desktop). Just verify the
    # CLI is present and the daemon is reachable, then point the
    # user at the install pages if not.
    if ! command -v docker >/dev/null 2>&1; then
        fail "docker not found. Install one of these and re-run:
    Docker Desktop:  https://www.docker.com/products/docker-desktop
    OrbStack:        https://orbstack.dev
    colima:          brew install colima && colima start"
    fi
    if ! docker info >/dev/null 2>&1; then
        fail "docker is installed but the daemon isn't reachable. Start Docker Desktop / colima / OrbStack, then re-run."
    fi
    ok "docker reachable ($(docker --version 2>/dev/null | head -1))"
    manifest_set docker_installed_by_sultix false
}

darwin_install_launchd() {
    # No equivalent of `sultix install` — that subcommand bails on
    # non-Linux because it writes a systemd unit. Instead we set up
    # the LaunchAgent directly here:
    #
    #   - data dir at ~/Library/Application Support/sultix
    #   - log file  at ~/Library/Logs/sultix-controller.log
    #   - plist     at ~/Library/LaunchAgents/com.sultix.controller.plist
    #
    # All under the user's home so no sudo dance — fetch_binary already
    # placed the controller at $BIN_PATH (~/.local/bin/sultix on macOS).
    say "preparing data + log directories"
    mkdir -p "$DATA_DIR" "$HOME/Library/Logs" "$(dirname "$DARWIN_PLIST")"

    say "writing LaunchAgent plist (${DARWIN_PLIST})"
    cat > "$DARWIN_PLIST" <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
    <key>Label</key>            <string>${DARWIN_LABEL}</string>
    <key>ProgramArguments</key>
    <array>
        <string>${BIN_PATH}</string>
        <string>controller</string>
    </array>
    <key>EnvironmentVariables</key>
    <dict>
        <key>SULTIX_DATA_DIR</key> <string>${DATA_DIR}</string>
        <key>HOME</key>            <string>${HOME}</string>
        <key>PATH</key>            <string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
    </dict>
    <key>WorkingDirectory</key>     <string>${DATA_DIR}</string>
    <key>StandardOutPath</key>      <string>${DARWIN_LOG}</string>
    <key>StandardErrorPath</key>    <string>${DARWIN_LOG}</string>
    <key>RunAtLoad</key>            <true/>
    <key>KeepAlive</key>
    <dict>
        <key>SuccessfulExit</key>   <false/>
        <key>Crashed</key>          <true/>
    </dict>
    <key>ThrottleInterval</key>     <integer>3</integer>
</dict>
</plist>
EOF

    say "loading LaunchAgent"
    # Tear down any previous instance — booted at the same label
    # would refuse a fresh bootstrap. Errors fine on first install.
    launchctl bootout "gui/$(id -u)/${DARWIN_LABEL}" 2>/dev/null || true
    launchctl bootstrap "gui/$(id -u)" "$DARWIN_PLIST" \
        || fail "launchctl bootstrap failed — see $DARWIN_LOG"
    launchctl enable "gui/$(id -u)/${DARWIN_LABEL}"
    ok "LaunchAgent loaded — controller running on http://127.0.0.1:3000"
    say "logs: tail -f $DARWIN_LOG"
}

darwin_restart() {
    launchctl bootout "gui/$(id -u)/${DARWIN_LABEL}" 2>/dev/null || true
    launchctl bootstrap "gui/$(id -u)" "$DARWIN_PLIST"
}

# ── version + binary fetch ────────────────────────────────────────────
RELEASES_BASE="https://releases.sultix.ai"
LATEST_JSON_URL="${RELEASES_BASE}/latest.json"

current_version() {
    # Probe by absolute path — on macOS BIN_PATH (~/.local/bin/sultix)
    # may not be on PATH yet, especially during a curl-pipe-bash run
    # that didn't start from a login shell.
    if [ -x "$BIN_PATH" ]; then
        "$BIN_PATH" --version 2>/dev/null | awk '{print $NF}' || echo "unknown"
    elif command -v sultix >/dev/null 2>&1; then
        sultix --version 2>/dev/null | awk '{print $NF}' || echo "unknown"
    else
        echo ""
    fi
}

latest_version() {
    # latest.json published by the release pipeline:
    #   {"version":"0.4.133","released_at":"2026-04-26T..."}
    curl -fsSL --max-time 10 "$LATEST_JSON_URL" \
        | grep -o '"version"[[:space:]]*:[[:space:]]*"[^"]*"' \
        | head -1 \
        | sed 's/.*"\([^"]*\)"$/\1/'
}

fetch_agent_image() {
    # fetch_agent_image <version>
    # Pulls the per-arch gzipped tarball, sha256-verifies, docker-loads.
    # Idempotent: skips if a same-version sultix-agent image is already
    # local (re-run of install.sh is a no-op for the agent image).
    local v="$1"
    local tag="sultix-agent:$v"
    local key="sultix-agent-${ARCH}.tar.gz"
    local url="${RELEASES_BASE}/v${v}/${key}"
    local sums_url="${RELEASES_BASE}/v${v}/SHA256SUMS"

    if osudo docker image inspect "$tag" >/dev/null 2>&1; then
        ok "agent image $tag already loaded"
        return 0
    fi

    say "downloading $key (may take a minute — 200-400 MiB)"
    local out=/tmp/sultix-agent.tar.gz
    local sums=/tmp/sultix-agent.sums
    curl -fL --progress-bar -o "$out" "$url" \
        || fail "download failed: $url"
    curl -fsSL -o "$sums" "$sums_url" \
        || fail "sha256sums fetch failed"

    say "verifying sha256"
    local expected got
    expected="$(grep "  ${key}\$" "$sums" | awk '{print $1}')"
    [ -n "$expected" ] || fail "no sha256 entry for $key in SHA256SUMS"
    got="$(shasum -a 256 "$out" | awk '{print $1}')"
    [ "$expected" = "$got" ] || fail "sha256 mismatch on agent image"
    ok "sha256 verified"

    say "loading agent image into docker"
    gzip -dc "$out" | osudo docker load
    rm -f "$out" "$sums"
    # Ensure :latest also points at this version, since the controller
    # spawns agent containers with the implicit :latest tag.
    osudo docker tag "$tag" "sultix-agent:latest"
    ok "agent image loaded as $tag (also tagged latest)"
}

fetch_binary() {
    # fetch_binary <version> → /tmp/sultix.new
    local v="$1"
    local url="${RELEASES_BASE}/v${v}/sultix-${OS}-${ARCH}"
    local sums_url="${RELEASES_BASE}/v${v}/SHA256SUMS"
    local out=/tmp/sultix.new
    local sums=/tmp/sultix.sums

    say "downloading sultix-${OS}-${ARCH} v${v}"
    curl -fL --progress-bar -o "$out" "$url" \
        || fail "download failed: $url"
    curl -fsSL -o "$sums" "$sums_url" \
        || fail "sha256sums download failed: $sums_url"

    say "verifying sha256"
    local expected got
    expected="$(grep "  sultix-${OS}-${ARCH}\$" "$sums" | awk '{print $1}')"
    [ -n "$expected" ] || fail "no sha256 entry for sultix-${OS}-${ARCH} in SHA256SUMS"
    got="$(shasum -a 256 "$out" | awk '{print $1}')"
    [ "$expected" = "$got" ] || fail "sha256 mismatch (expected=$expected got=$got)"
    ok "sha256 verified"

    chmod +x "$out"
    osudo install -d -m 0755 "$(dirname "$BIN_PATH")"
    osudo mv "$out" "$BIN_PATH"
    rm -f "$sums"
    ok "installed $BIN_PATH"
}

# ── bootstrap helpers ─────────────────────────────────────────────────
gen_password() {
    # 16 chars, alnum, no shell-special. Sourced from /dev/urandom.
    #
    # The tr | head -c 16 pattern is the obvious-looking shape but
    # it explodes under `set -euo pipefail`: head exits after 16
    # bytes, tr gets a SIGPIPE, the pipeline reports the SIGPIPE as
    # a non-zero exit, pipefail propagates it, set -e exits the
    # script — silently, mid-function. Wrap in a subshell so the
    # SIGPIPE doesn't escape to the outer shell's pipefail check.
    ( set +o pipefail
      LC_ALL=C tr -dc 'A-Za-z0-9' < /dev/urandom | head -c 16 )
}

# ── argument validation ───────────────────────────────────────────────
validate_args() {
    case "$BACKEND" in
        sqlite|postgres) ;;
        *) fail "--backend must be sqlite or postgres (got: $BACKEND)" ;;
    esac
    case "$PROXY" in
        none|caddy) ;;
        *) fail "--proxy must be none or caddy (got: $PROXY)" ;;
    esac
    # macOS path is sqlite-only + no built-in proxy. The Postgres
    # bring-up wraps a docker run + apt-installed psql client and
    # writes secrets under /home/sultix-ctrl; Caddy is apt-only and
    # uses systemctl. Both would need separate darwin paths and
    # neither makes much sense on a single-user Mac.
    if [ "${OS:-}" = "darwin" ]; then
        if [ "$BACKEND" = "postgres" ]; then
            fail "macOS: --backend=postgres not supported yet (Linux only). Use sqlite or run postgres yourself + point at it via Settings."
        fi
        if [ "$PROXY" = "caddy" ]; then
            fail "macOS: --proxy=caddy not supported yet (Linux only). Bind to localhost and front it with your own proxy."
        fi
    fi
    if [ "$PROXY" = "caddy" ]; then
        if [ -n "$PROXY_CERT" ] && [ -z "$PROXY_KEY" ]; then
            fail "--proxy-cert requires --proxy-key"
        fi
        if [ -n "$PROXY_KEY" ] && [ -z "$PROXY_CERT" ]; then
            fail "--proxy-key requires --proxy-cert"
        fi
        if [ -z "$PROXY_DOMAIN" ] && [ -z "$PROXY_CERT" ]; then
            # Interactive will ask; unattended needs one or the other.
            if [ "$INTERACTIVE" != "1" ]; then
                fail "--proxy=caddy needs --proxy-domain (ACME) or --proxy-cert/--proxy-key (BYO)"
            fi
        fi
    fi
}

# ── postgres (linux) ──────────────────────────────────────────────────
# Brings up pgvector/pgvector:pg16 in a docker container bound to
# 127.0.0.1:5432, named volume sultix-postgres-data, random
# password persisted to /home/sultix-ctrl/.sultix-postgres.env.
# Idempotent: existing container/volume reused; existing creds file
# wins so reruns don't rotate the password under a running install.
# Writes db.json into the controller's data dir so OpenSmart picks
# postgres on next restart.
linux_setup_postgres() {
    local env_file="/home/sultix-ctrl/.sultix-postgres.env"
    say "postgres: pgvector/pgvector:pg16 in docker"

    if sudo test -f "$env_file"; then
        ok "postgres: existing creds at $env_file (reusing)"
    else
        # Same SIGPIPE landmine as gen_password — tr piped into head
        # under pipefail kills the script silently. Subshell.
        local pass
        pass="$( ( set +o pipefail
                   LC_ALL=C tr -dc 'A-Za-z0-9' < /dev/urandom | head -c 48 ) )"
        sudo install -d -m 0700 -o sultix-ctrl -g sultix-ctrl /home/sultix-ctrl
        sudo tee "$env_file" >/dev/null <<EOF
PG_USER=sultix
PG_DB=sultix
PG_PASS=$pass
PG_HOST=127.0.0.1
PG_PORT=5432
PG_CONTAINER=sultix-postgres
PG_VOLUME=sultix-postgres-data
PG_IMAGE=pgvector/pgvector:pg16
EOF
        sudo chmod 600 "$env_file"
        sudo chown sultix-ctrl:sultix-ctrl "$env_file"
        ok "postgres: generated creds → $env_file"
    fi

    # Source via sudo so the file's mode 0600 doesn't block us
    local pg_user pg_db pg_pass pg_port
    pg_user=$(sudo grep '^PG_USER=' "$env_file" | cut -d= -f2)
    pg_db=$(sudo grep '^PG_DB=' "$env_file" | cut -d= -f2)
    pg_pass=$(sudo grep '^PG_PASS=' "$env_file" | cut -d= -f2)
    pg_port=$(sudo grep '^PG_PORT=' "$env_file" | cut -d= -f2)

    say "postgres: pulling image"
    sudo -u sultix-ctrl docker pull -q pgvector/pgvector:pg16

    if sudo -u sultix-ctrl docker volume inspect sultix-postgres-data >/dev/null 2>&1; then
        ok "postgres: volume sultix-postgres-data already exists"
    else
        sudo -u sultix-ctrl docker volume create sultix-postgres-data >/dev/null
        ok "postgres: volume sultix-postgres-data created"
    fi

    if sudo -u sultix-ctrl docker ps -a --filter name='^sultix-postgres$' --format '{{.Names}}' \
            | grep -qx sultix-postgres; then
        ok "postgres: container sultix-postgres exists (leaving alone)"
    else
        sudo -u sultix-ctrl docker run -d \
            --name sultix-postgres \
            --restart unless-stopped \
            -p "127.0.0.1:${pg_port}:5432" \
            -v sultix-postgres-data:/var/lib/postgresql/data \
            -e POSTGRES_USER="$pg_user" \
            -e POSTGRES_DB="$pg_db" \
            -e POSTGRES_PASSWORD="$pg_pass" \
            pgvector/pgvector:pg16 >/dev/null
        ok "postgres: container started"
    fi

    say "postgres: waiting for pg_isready"
    local i=0
    while [ $i -lt 60 ]; do
        if sudo -u sultix-ctrl docker exec sultix-postgres \
                pg_isready -U "$pg_user" -d "$pg_db" >/dev/null 2>&1; then
            break
        fi
        i=$((i+1)); sleep 1
    done
    [ $i -lt 60 ] || fail "postgres: pg_isready timeout after 60s"
    ok "postgres: ready (after ${i}s)"

    sudo -u sultix-ctrl docker exec -e PGPASSWORD="$pg_pass" sultix-postgres \
        psql -U "$pg_user" -d "$pg_db" -c "CREATE EXTENSION IF NOT EXISTS vector;" >/dev/null
    ok "postgres: vector extension enabled"

    # Write db.json so OpenSmart picks postgres on next boot.
    local db_dir="$DATA_DIR/db"
    sudo install -d -m 0700 -o sultix-ctrl -g sultix-ctrl "$db_dir"
    sudo tee "$db_dir/db.json" >/dev/null <<EOF
{"backend":"postgres","postgres_url":"postgres://${pg_user}:${pg_pass}@127.0.0.1:${pg_port}/${pg_db}?sslmode=disable"}
EOF
    sudo chmod 600 "$db_dir/db.json"
    sudo chown sultix-ctrl:sultix-ctrl "$db_dir/db.json"
    ok "postgres: db.json written → $db_dir/db.json"

    say "postgres: restarting controller to pick up backend swap"
    sudo systemctl restart sultix-controller
    sleep 2
    if sudo journalctl -u sultix-controller -n 30 --no-pager 2>/dev/null \
            | grep -q "backend=postgres"; then
        ok "postgres: controller is now on postgres"
    else
        warn "postgres: controller didn't log backend=postgres yet — check 'journalctl -u sultix-controller'"
    fi
}

# ── caddy reverse proxy (linux) ───────────────────────────────────────
# Three modes:
#   - --proxy-domain=DOMAIN  → ACME HTTP-01 (or DNS-01 if NAT'd and
#     CLOUDFLARE_API_TOKEN is set)
#   - --proxy-cert + --proxy-key → BYO cert; caddy doesn't talk ACME
#   - (no domain, no cert) → interactive mode prompts; --yes errors
#     out earlier in validate_args
linux_setup_caddy() {
    if command -v caddy >/dev/null 2>&1; then
        manifest_set caddy_installed_by_sultix false
    else
        manifest_set caddy_installed_by_sultix true
    fi
    if ! command -v caddy >/dev/null 2>&1; then
        say "caddy: installing (apt)"
        sudo apt-get install -y -qq debian-keyring debian-archive-keyring apt-transport-https \
            >/dev/null 2>&1 || true
        sudo rm -f /usr/share/keyrings/caddy-stable-archive-keyring.gpg
        curl -fsSL https://dl.cloudsmith.io/public/caddy/stable/gpg.key \
            | sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
        curl -fsSL https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt \
            | sudo tee /etc/apt/sources.list.d/caddy-stable.list >/dev/null
        sudo apt-get update -qq
        sudo apt-get install -y -qq caddy
        ok "caddy: installed"
    else
        ok "caddy: already installed ($(caddy version 2>&1 | head -1))"
    fi

    # Caddyfile: we own a single sites-available snippet that we
    # import into the master Caddyfile. The user owns everything
    # else; uninstall touches only our file.
    sudo install -d -m 0755 /etc/caddy/sites-available
    local snippet=/etc/caddy/sites-available/sultix.caddyfile

    if [ -n "$PROXY_CERT" ]; then
        # BYO cert mode. The user-supplied PEMs are referenced in
        # place; we don't copy them so a key rotation by replacing
        # the source file just works after `systemctl reload caddy`.
        #
        # Caddy runs as its own daemon user — by default it can't
        # read mode-0600 files owned by root. Adjust ownership +
        # mode so the caddy user has read access without making the
        # files world-readable. Cert: mode 0644 (public anyway).
        # Key: chgrp caddy + mode 0640 (caddy can read; world can't).
        if id caddy >/dev/null 2>&1; then
            sudo chmod 0644 "$PROXY_CERT"
            sudo chgrp caddy "$PROXY_KEY"  2>&1 || warn "chgrp caddy on $PROXY_KEY failed"
            sudo chmod 0640 "$PROXY_KEY"
            ok "caddy: BYO cert/key permissions adjusted (cert 0644, key caddy:caddy 0640)"
        fi
        sudo tee "$snippet" >/dev/null <<EOF
# generated by install.sultix.ai — sultix admin reverse proxy
# BYO TLS cert mode: serves on the address caddy listens at by
# default (whichever site_addresses you've set globally).
:443 {
    tls $PROXY_CERT $PROXY_KEY
    reverse_proxy localhost:${BIND##*:}
}
EOF
        ok "caddy: BYO cert config written ($snippet)"
    else
        # ACME mode. Caddy auto-issues from Let's Encrypt; the
        # cloudflare token (if set) goes in via the dns01 module
        # for the tls config. Without DNS-01 module support
        # bundled into the standard caddy package, DNS-01 needs
        # `caddy-dns/cloudflare` plugin — we'll print a warning
        # if the user passed a token but the bundled caddy doesn't
        # speak DNS-01, so they know to install the plugin.
        if [ -n "$CLOUDFLARE_API_TOKEN_VAL" ]; then
            sudo tee "$snippet" >/dev/null <<EOF
# generated by install.sultix.ai — sultix admin reverse proxy
# ACME via DNS-01 (Cloudflare). Requires the caddy-dns/cloudflare
# plugin; falls back to HTTP-01 if the plugin is missing.
$PROXY_DOMAIN {
    tls {
        dns cloudflare $CLOUDFLARE_API_TOKEN_VAL
    }
    reverse_proxy localhost:${BIND##*:}
}
EOF
            warn "caddy: DNS-01 config written. If Caddy reload fails with"
            warn "       'unknown directive: dns cloudflare', install the"
            warn "       cloudflare plugin: https://caddyserver.com/docs/build"
        else
            sudo tee "$snippet" >/dev/null <<EOF
# generated by install.sultix.ai — sultix admin reverse proxy
# ACME via HTTP-01. Caddy retries indefinitely if the challenge
# fails, so a NAT'd host stays online (just without TLS) until
# the operator either forwards :80 or switches to DNS-01.
$PROXY_DOMAIN {
    reverse_proxy localhost:${BIND##*:}
}
EOF
        fi
        ok "caddy: ACME config written for $PROXY_DOMAIN ($snippet)"
    fi

    # Master Caddyfile: ensure it imports sites-available/*.
    if ! sudo grep -q 'import sites-available' /etc/caddy/Caddyfile 2>/dev/null; then
        sudo tee -a /etc/caddy/Caddyfile >/dev/null <<EOF

# Added by install.sultix.ai — pull in per-site snippets.
import sites-available/*
EOF
        ok "caddy: imported sites-available/* in /etc/caddy/Caddyfile"
    fi

    sudo systemctl enable --now caddy
    sudo systemctl reload caddy
    ok "caddy: running + reloaded"
}

# ── admin password bootstrap ──────────────────────────────────────────
# Calls `sultix admin reset-password <pw>` once the controller is
# up. Either:
#   --admin-password=<literal>  set to that value
#   --admin-password=auto       generate a 16-char password
#   (interactive; no flag)      prompt
#   ASSUME_YES + no flag        leave at default sultix00 + warn
bootstrap_admin_password() {
    local pw="$ADMIN_PASSWORD"

    if [ "$pw" = "auto" ]; then
        pw="$(gen_password)"
    elif [ -z "$pw" ] && [ "$INTERACTIVE" = "1" ]; then
        local p1 p2
        printf "\n${C_CYAN}Initial admin password${C_RESET} (leave blank to auto-generate)\n" > /dev/tty
        p1="$(ask_secret "  password")"
        if [ -z "$p1" ]; then
            pw="$(gen_password)"
            ok "auto-generated admin password (will print at end)"
        else
            p2="$(ask_secret "  confirm ")"
            if [ "$p1" != "$p2" ]; then
                fail "passwords don't match"
            fi
            pw="$p1"
        fi
    fi

    if [ -z "$pw" ]; then
        # Unattended without --admin-password: leave the default
        # in place, warn loudly, fix-it instructions in the summary.
        warn "no admin password set — controller booted with the default"
        warn "  username: admin   password: sultix00"
        warn "Set immediately:  sudo sultix admin reset-password"
        ADMIN_PASSWORD_DISPLAY="sultix00 (DEFAULT — change immediately)"
        return 0
    fi

    say "setting admin password (sultix admin reset-password)"
    # The admin CLI finds the data dir via SULTIX_DATA_DIR env. The
    # binary self-canonicalizes ownership (chown -R DATA_DIR to the
    # service user on exit, see internal/appctx/ownership_linux.go),
    # so running this as plain root is fine — files end up owned by
    # sultix-ctrl regardless. macOS LaunchAgent runs as the operator
    # so `osudo` drops the prefix there.
    #
    # `env FOO=bar cmd …` instead of the `FOO=bar cmd …` form: when
    # osudo expands "$@", the leading FOO=bar arg is no longer parsed
    # by bash as an assignment-prefix and dies with "FOO=bar: command
    # not found". `env` sidesteps that on both OSes.
    if osudo env SULTIX_DATA_DIR="$DATA_DIR" "$BIN_PATH" admin reset-password "$pw" \
            >/dev/null 2>&1; then
        ok "admin password set"
        ADMIN_PASSWORD_DISPLAY="$pw"
    else
        warn "admin password reset failed — falling back to default sultix00"
        ADMIN_PASSWORD_DISPLAY="sultix00 (DEFAULT — reset failed; try 'sudo sultix admin reset-password')"
    fi
}

# ── interactive prompts ───────────────────────────────────────────────
# Each *_prompt function reads from /dev/tty when INTERACTIVE=1; in
# unattended mode (--yes / no tty) they're a no-op and existing flag
# values stay. Fresh install calls each once; modify-mode picks
# specific ones based on what the user chose.

prompt_backend() {
    [ "$INTERACTIVE" != "1" ] && return
    printf "\n${C_CYAN}Database backend${C_RESET}\n" > /dev/tty
    printf "  [1] SQLite — file-based, simplest, default for single-host\n" > /dev/tty
    printf "  [2] Postgres — pgvector container, better for many agents / search\n" > /dev/tty
    local ans
    ans="$(ask "Choose" "1")"
    case "$ans" in
        1|sqlite|"")   BACKEND="sqlite" ;;
        2|postgres|pg) BACKEND="postgres" ;;
        *) fail "unknown backend choice: $ans" ;;
    esac
}

prompt_proxy() {
    [ "$INTERACTIVE" != "1" ] && return
    printf "\n${C_CYAN}Reverse proxy / TLS${C_RESET}\n" > /dev/tty
    printf "  [1] None — admin UI on http://127.0.0.1:3000 (LAN-only or behind your own proxy)\n" > /dev/tty
    printf "  [2] Caddy + Let's Encrypt — public domain, automatic HTTPS via ACME\n" > /dev/tty
    printf "  [3] Caddy + bring-your-own cert — corporate CA / mkcert / self-signed\n" > /dev/tty
    local ans
    ans="$(ask "Choose" "1")"
    case "$ans" in
        1|none|"")
            PROXY="none"
            ;;
        2|caddy|acme|le)
            PROXY="caddy"
            PROXY_DOMAIN="$(ask "  Public domain that points at this host" "${PROXY_DOMAIN:-}")"
            [ -n "$PROXY_DOMAIN" ] || fail "domain required for ACME"
            printf "\n  ⓘ HTTP-01 needs DNS → this host's public IP + inbound :80.\n" > /dev/tty
            printf "    Behind NAT, set a Cloudflare API token (DNS-01 path).\n" > /dev/tty
            local tok
            tok="$(ask "  Cloudflare API token (optional, blank = HTTP-01)" "")"
            [ -n "$tok" ] && CLOUDFLARE_API_TOKEN_VAL="$tok"
            ;;
        3|byo|cert)
            PROXY="caddy"
            PROXY_CERT="$(ask "  TLS cert path (fullchain.pem)" "${PROXY_CERT:-}")"
            PROXY_KEY="$(ask "  TLS key path (privkey.pem)"   "${PROXY_KEY:-}")"
            [ -n "$PROXY_CERT" ] && [ -n "$PROXY_KEY" ] || fail "cert + key required for BYO"
            ;;
        *) fail "unknown proxy choice: $ans" ;;
    esac
}

prompt_admin_password() {
    # Returns via $ADMIN_PASSWORD. If --admin-password was already
    # passed (literal or "auto"), respect it and skip the prompt.
    [ -n "$ADMIN_PASSWORD" ] && return
    [ "$INTERACTIVE" != "1" ] && { ADMIN_PASSWORD="auto"; return; }
    printf "\n${C_CYAN}Initial admin password${C_RESET} (leave blank to auto-generate)\n" > /dev/tty
    local p1 p2
    p1="$(ask_secret "  password")"
    if [ -z "$p1" ]; then
        ADMIN_PASSWORD="auto"
        return
    fi
    p2="$(ask_secret "  confirm ")"
    [ "$p1" = "$p2" ] || fail "passwords don't match"
    ADMIN_PASSWORD="$p1"
}

# migrate_data_dir moves the entire data tree (agents, db, master.key,
# keys, devca) from one path to another while the controller is
# stopped, then rewrites the systemd unit + config so the new path
# is the canonical one going forward.
#
# Strict: bails on the first error rather than half-moving and
# leaving the operator with two partial trees. The service is
# restarted only after every step succeeds. Idempotent for the
# trivial case (old == new): just returns.
migrate_data_dir() {
    local old="$1" new="$2"
    if [ -z "$old" ] || [ -z "$new" ]; then
        fail "migrate_data_dir: old + new required"
    fi
    if [ "$old" = "$new" ]; then
        say "data dir already at $new"
        return 0
    fi
    if ! sudo test -d "$old"; then
        fail "old data dir not found: $old"
    fi
    if sudo test -e "$new" && sudo test -n "$(sudo ls -A "$new" 2>/dev/null)"; then
        fail "destination $new is non-empty — refusing to overlay (move it aside or pick another path)"
    fi

    say "migrating data: $old → $new"

    # Stop the service so nothing rewrites files mid-mv. Best-effort
    # because on a partial install the unit may not exist yet.
    if [ "$OS" = "linux" ]; then
        sudo systemctl stop sultix-controller.service 2>/dev/null || true
    elif [ "$OS" = "darwin" ]; then
        launchctl bootout "gui/$(id -u)/${DARWIN_LABEL}" 2>/dev/null || true
    fi

    # Move atomically when on the same filesystem; rsync+rm when
    # crossing mounts. mv -T avoids the "moved into existing dir"
    # case (we already refused above if dest exists non-empty, but
    # belt + suspenders).
    sudo mkdir -p "$(dirname "$new")"
    if ! sudo mv "$old" "$new" 2>/dev/null; then
        say "cross-filesystem move; using rsync"
        sudo mkdir -p "$new"
        if ! sudo rsync -aHAX --remove-source-files "$old/" "$new/"; then
            fail "rsync failed; old data still at $old"
        fi
        sudo find "$old" -type d -empty -delete
    fi

    # Re-apply ownership. The systemd unit runs as sultix-ctrl, so
    # the entire data tree must belong to that user. -R catches any
    # files whose ownership drifted on a cross-filesystem rsync.
    if [ "$OS" = "linux" ]; then
        sudo chown -R sultix-ctrl:sultix-ctrl "$new"
        sudo chmod 0700 "$new"
    fi

    # Update config.yaml + unit. `sultix install --data-dir=NEW`
    # rewrites both idempotently (and re-stamps the unit's
    # SULTIX_DATA_DIR).
    if [ "$OS" = "linux" ]; then
        sudo /usr/local/bin/sultix install --data-dir="$new"
    fi

    # Restart. `sultix install` already does this on Linux, so this
    # is a no-op there but matters on macOS.
    if [ "$OS" = "darwin" ]; then
        darwin_install_launchd
    fi

    ok "data migrated to $new"
    DATA_DIR="$new"
}

# ── modify mode (rerun on a current-version install) ──────────────────
# Shows the current config + a menu of add-ons. User picks one, we
# run JUST that piece. Loop until they say done. Postgres bring-up
# just installs the container; the admin uses Settings → Database
# to actually flip + migrate the data layer.
modify_mode() {
    local cur="$1"

    # Read current state from the manifest and the host. db.json
    # lives under the active data dir (resolved above).
    local cur_backend="sqlite"
    local cur_proxy="none"
    if sudo test -f "$DATA_DIR/db/db.json"; then
        if sudo grep -q '"backend":"postgres"' "$DATA_DIR/db/db.json" 2>/dev/null; then
            cur_backend="postgres"
        fi
    fi
    if sudo test -f /etc/caddy/sites-available/sultix.caddyfile; then
        cur_proxy="caddy"
    fi

    while true; do
        hr
        printf "${C_CYAN}sultix v%s — modify install${C_RESET}\n" "$cur"
        printf "  current backend  %s\n" "$cur_backend"
        printf "  current proxy    %s\n" "$cur_proxy"
        printf "\n"
        printf "  [1] Add Postgres (install pgvector container; migrate via Settings → Database)\n"
        printf "  [2] Add Caddy + Let's Encrypt (public domain)\n"
        printf "  [3] Add Caddy + bring-your-own cert\n"
        printf "  [4] Reset admin password\n"
        printf "  [5] Restart controller\n"
        printf "  [6] Move data dir to a different path\n"
        printf "  [q] Quit (no further changes)\n"
        hr
        local choice
        choice="$(ask "Choose" "q")"
        case "$choice" in
            1)
                if [ "$OS" = "darwin" ]; then
                    warn "Postgres bring-up is Linux-only for now. Run pgvector yourself (brew/docker) and point at it via Settings → Database."
                elif [ "$cur_backend" = "postgres" ]; then
                    warn "Postgres is already installed."
                else
                    BACKEND="postgres"
                    linux_setup_postgres
                    cur_backend="postgres"
                    say "Postgres container is up. Open Settings → Database in the UI to migrate your sqlite data over."
                fi
                ;;
            2)
                if [ "$OS" = "darwin" ]; then
                    warn "Caddy bring-up is Linux-only for now (uses apt + systemctl). Front the controller with your own proxy."
                elif [ "$cur_proxy" = "caddy" ]; then
                    warn "Caddy is already configured. Edit /etc/caddy/sites-available/sultix.caddyfile to change settings."
                else
                    PROXY="caddy"
                    PROXY_DOMAIN="$(ask "Public domain" "")"
                    [ -n "$PROXY_DOMAIN" ] || { warn "domain required; skipping"; continue; }
                    local tok
                    tok="$(ask "Cloudflare API token (optional, blank = HTTP-01)" "")"
                    [ -n "$tok" ] && CLOUDFLARE_API_TOKEN_VAL="$tok"
                    linux_setup_caddy
                    cur_proxy="caddy"
                fi
                ;;
            3)
                if [ "$OS" = "darwin" ]; then
                    warn "Caddy bring-up is Linux-only for now. Front the controller with your own proxy."
                elif [ "$cur_proxy" = "caddy" ]; then
                    warn "Caddy is already configured."
                else
                    PROXY="caddy"
                    PROXY_CERT="$(ask "TLS cert path" "")"
                    PROXY_KEY="$(ask "TLS key path" "")"
                    [ -n "$PROXY_CERT" ] && [ -n "$PROXY_KEY" ] || { warn "cert + key required; skipping"; continue; }
                    linux_setup_caddy
                    cur_proxy="caddy"
                fi
                ;;
            4)
                ADMIN_PASSWORD=""   # force prompt
                prompt_admin_password
                bootstrap_admin_password
                if [ -n "${ADMIN_PASSWORD_DISPLAY:-}" ]; then
                    printf "  new admin password: %s\n" "$ADMIN_PASSWORD_DISPLAY"
                fi
                ;;
            5)
                say "Restarting controller…"
                os_restart_service
                ok "restarted"
                ;;
            6)
                local new_dir
                new_dir="$(ask "New data dir" "$DATA_DIR")"
                if [ -z "$new_dir" ] || [ "$new_dir" = "$DATA_DIR" ]; then
                    warn "no change"
                else
                    confirm_destructive "Stop the controller, move all data from $DATA_DIR to $new_dir, restart. The old path will be empty afterwards." "MOVE" \
                        || { say "aborted"; continue; }
                    migrate_data_dir "$DATA_DIR" "$new_dir"
                    print_paths_summary "$DATA_DIR"
                fi
                ;;
            q|Q|""|quit|exit)
                ok "no changes."
                return
                ;;
            *)
                warn "Unknown choice: $choice"
                ;;
        esac
    done
}

# ── flow ──────────────────────────────────────────────────────────────
# ── update path: confirm → swap binary → restart, nothing else ────────
# Reused by main() when an existing install needs a version bump.
# Deliberately bare: no docker re-probe, no admin-password reset, no
# postgres/caddy bring-up, no manifest churn beyond the version
# bump. The user wanted this to be a one-prompt operation.
update_only() {
    local cur="$1" lat="$2"
    hr
    printf "${C_CYAN}sultix update available${C_RESET}\n"
    printf "  installed   v%s\n" "$cur"
    printf "  latest      v%s\n" "$lat"
    hr
    confirm "Update now?" || { say "aborted"; exit 0; }

    fetch_binary "$lat"
    fetch_agent_image "$lat"

    # Just restart the running service; it picks up the new binary
    # (PID 1 swap inside the unit, same path as `sultix install`'s
    # idempotent re-run but without re-running the install
    # subcommand). The systemd unit doesn't need rewriting and the
    # data dir / postgres / caddy choices stay exactly as the user
    # left them.
    os_restart_service 2>/dev/null || true

    # Update manifest so future passes know what version is current.
    manifest_init
    manifest_set sultix_version "$lat"
    manifest_set last_updated_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)"

    hr
    ok "updated to v$lat"
    print_paths_summary "$DATA_DIR"
    hr
}

main() {
    detect_os
    manifest_relocate_for_os
    validate_args

    # Snapshot the operator's explicit --data-dir choice (if any)
    # before any defaulting kicks in. Used below to detect "re-run
    # with a different path" and offer migration without ambiguity.
    local USER_DATA_DIR="$DATA_DIR"
    local EXISTING_DATA_DIR
    EXISTING_DATA_DIR="$(read_existing_data_dir)"

    # Legacy install detection. Pre-2026-04-29 installs put data
    # under /var/lib/sultix without writing /etc/sultix/config.yaml
    # at all (config lived at /opt/sultix/.controller/config.yaml,
    # the hidden sub-dir of the binary's home). When we re-run
    # install.sh on such a host, EXISTING_DATA_DIR is empty (no
    # /etc/sultix/config.yaml to read) so DATA_DIR falls through to
    # the new default (/opt/sultix) and the operator's data
    # silently disconnects.
    #
    # The signature: /var/lib/sultix has master.key OR sultix.db.
    # If found, treat /var/lib/sultix as the live data root and
    # skip the default-fallback branch. The operator can override
    # by passing --data-dir explicitly (caught by USER_DATA_DIR
    # being non-empty); we only auto-detect when no flag was given.
    #
    # bodaay.org prod regression 2026-04-29: silent default to
    # /opt/sultix on a re-run minted a fresh master.key + device-CA
    # at the new path; agents/hosts/keys all "vanished" because the
    # running controller was reading the wrong key. Auto-detect is
    # the fix; the boot-time guard in cmd/sultix/controller_web.go
    # is the defense-in-depth safety net.
    local LEGACY_DATA_DIR=""
    if [ "$OS" = "linux" ] && [ -z "$EXISTING_DATA_DIR" ] && [ -z "$USER_DATA_DIR" ]; then
        if sudo test -f /var/lib/sultix/master.key 2>/dev/null \
           || sudo test -f /var/lib/sultix/db/sultix.db 2>/dev/null; then
            LEGACY_DATA_DIR="/var/lib/sultix"
            warn "legacy data detected at /var/lib/sultix — keeping it as data_dir on this upgrade"
            warn "(pre-2026-04-29 installs put data here; new default /opt/sultix would silently strand it)"
            warn "to migrate, re-run interactively + pick option [6] Move data dir on the modify menu"
            DATA_DIR="$LEGACY_DATA_DIR"
            EXISTING_DATA_DIR="$LEGACY_DATA_DIR"
        fi
    fi

    # Data dir resolution. Precedence:
    #   1. --data-dir CLI flag (validate_args already populated DATA_DIR)
    #   2. Legacy /var/lib/sultix detected (above)
    #   3. Existing /etc/sultix/config.yaml's data_dir field — so a
    #      re-run on an installed host always honors the operator's
    #      original choice without asking again
    #   4. Per-OS default (/opt/sultix on linux, ~/Library/… on darwin)
    if [ -z "$DATA_DIR" ]; then
        DATA_DIR="$EXISTING_DATA_DIR"
    fi
    if [ -z "$DATA_DIR" ]; then
        DATA_DIR="$(os_default_data_dir)"
    fi

    # Banner first — runs on every entry point (fresh, update, modify,
    # current, dev). Includes the Android download URL so a user who
    # came in for the controller also notices the mobile app exists.
    print_banner

    case "$OS" in
        linux)
            if [ "$PKG" != "apt" ]; then
                fail "linux: only apt-based distros (Ubuntu/Debian) are supported — $DISTRO coming soon"
            fi
            ;;
        darwin)
            # Refuse root on macOS — the LaunchAgent runs in user
            # context and writes to ~/Library. A root invocation
            # would land plist + state in /var/root and make the
            # eventual login-time start fail to find them.
            if [ "$(id -u)" = "0" ]; then
                fail "macOS: don't run with sudo. The script doesn't need it — binary lands in ~/.local/bin and the LaunchAgent runs in your user session."
            fi
            ;;
        *)
            fail "v1 supports Linux (Ubuntu/Debian) and macOS only — $OS coming soon"
            ;;
    esac

    # ── version branch: fresh install / update / current / dev ──────
    local cur lat
    cur="$(current_version)"
    lat="$(latest_version)" || fail "could not fetch $LATEST_JSON_URL — check network"
    [ -n "$lat" ] || fail "latest version empty in $LATEST_JSON_URL"

    # If sultix is already installed, ALWAYS print the path summary
    # before branching. The user's most-asked question after every
    # install/update is "where the hell did you put X" — so just
    # tell them, every run, regardless of which branch fires next.
    if [ -n "$cur" ]; then
        hr
        printf "${C_CYAN}sultix is installed${C_RESET} (v$cur)\n"
        print_paths_summary "$DATA_DIR"
        hr
    fi

    # Re-run with --data-dir=NEW that differs from the persisted
    # path → run migration as a one-shot, exit when done. Stop
    # service, mv data, chown, rewrite unit + config, restart.
    # The migration is the operator's whole goal in this case;
    # skip the version-branching that would otherwise fire next.
    # Skipped when no existing install (USER_DATA_DIR just becomes
    # the fresh-install target instead).
    if [ -n "$cur" ] && [ -n "$USER_DATA_DIR" ] && [ -n "$EXISTING_DATA_DIR" ] \
       && [ "$USER_DATA_DIR" != "$EXISTING_DATA_DIR" ]; then
        warn "data-dir mismatch: installed at $EXISTING_DATA_DIR, requested $USER_DATA_DIR"
        if [ "$INTERACTIVE" = "1" ]; then
            confirm "Move data from $EXISTING_DATA_DIR to $USER_DATA_DIR now?" || { say "aborted"; exit 0; }
        elif [ "$ASSUME_YES" != "1" ]; then
            fail "data-dir mismatch in unattended run; rerun with --yes to confirm migration"
        fi
        # The migrate_data_dir helper relies on `sultix install
        # --data-dir=` which only exists on post-2026-04-29
        # binaries. If we're upgrading from an older version,
        # bring the binary forward first so the install subcommand
        # understands the flag.
        if [ "$cur" != "$lat" ]; then
            say "bringing binary to v$lat first so the install subcommand supports --data-dir"
            fetch_binary "$lat"
            fetch_agent_image "$lat"
        fi
        migrate_data_dir "$EXISTING_DATA_DIR" "$USER_DATA_DIR"
        DATA_DIR="$USER_DATA_DIR"
        hr
        print_paths_summary "$DATA_DIR"
        hr
        exit 0
    fi

    # Three modes:
    #   - no cur                  → fresh install (interactive walkthrough)
    #   - cur < lat               → update_only (one-prompt confirm + swap)
    #   - cur = lat, --force      → fresh-install reinstall
    #   - cur = lat, interactive  → modify_mode (offer postgres/caddy/admin-pw)
    #   - cur = lat, unattended   → "current" + exit
    #   - cur > lat               → "you have a newer build" + exit (dev)
    if [ -z "$cur" ]; then
        :   # fresh install — fall through
    elif [ "$cur" \> "$lat" ]; then
        warn "installed v$cur is NEWER than latest v$lat (dev build?). Skipping."
        exit 0
    elif [ "$cur" != "$lat" ]; then
        # Newer release available → update_only. Skips bootstrap
        # (admin password, master.key, postgres, caddy) — those
        # were configured on first install and stay as-is.
        update_only "$cur" "$lat"
        return
    elif [ "$FORCE" = "1" ]; then
        say "v$cur is current; --force given, will reinstall from scratch"
        # Force = treat as fresh-install reinstall. Falls through.
    elif [ "$INTERACTIVE" = "1" ]; then
        # Same version + interactive → modify mode. User reruns to
        # add components or reset the admin password without doing
        # a full reinstall.
        modify_mode "$cur"
        return
    else
        ok "sultix v$cur is current — nothing to do"
        say "  rerun interactively to modify (add postgres / caddy / reset admin pw)"
        say "  rerun with --force to reinstall from scratch"
        exit 0
    fi

    # ── fresh install / forced reinstall: interactive walkthrough ───
    # Show the planned paths up front so the user can confirm before
    # we touch the system. Bind address used to be in this list but
    # was misleading — the controller binds 0.0.0.0 by default; the
    # bind setting now lives in /etc/sultix/config.yaml's `bind`
    # field (commented stub written by `sultix install`).
    hr
    printf "${C_CYAN}sultix installer${C_RESET}\n"
    printf "  os         %s/%s (%s %s)\n" "$OS" "$ARCH" "$DISTRO" "$DISTRO_VERSION"
    print_paths_summary "$DATA_DIR"
    hr

    # Prompt for unset choices in interactive mode. Flags from CLI
    # win — passing --backend=postgres skips the backend prompt.
    # On macOS only sqlite + no-proxy are supported (validate_args
    # already rejected anything else from CLI), so skip both prompts.
    if [ "$OS" != "darwin" ]; then
        [ "$BACKEND" = "sqlite" ] && [ "$INTERACTIVE" = "1" ] && prompt_backend
        [ "$PROXY"   = "none"   ] && [ "$INTERACTIVE" = "1" ] && prompt_proxy
    fi
    prompt_admin_password

    hr
    printf "  backend    %s\n" "$BACKEND"
    printf "  proxy      %s%s\n" "$PROXY" \
        "$([ -n "$PROXY_DOMAIN" ] && printf " (%s)" "$PROXY_DOMAIN")"
    hr
    confirm "Proceed?" || { say "aborted"; exit 0; }

    # ── manifest: record what we placed so uninstall doesn't stomp
    # on user-owned packages we found pre-existing.
    manifest_init
    manifest_set sultix_version "$lat"
    manifest_set installed_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)"

    # ── docker (always — agent containers need it) ──────────────────
    os_install_docker

    # ── fetch + install binary ──────────────────────────────────────
    fetch_binary "$lat"

    # ── fetch + load agent docker image ─────────────────────────────
    # Without this, the controller can't spawn agents — image-build
    # fails with "no Dockerfile at /opt/sultix-agent". The release
    # pipeline ships per-arch gzipped tarballs alongside binaries.
    fetch_agent_image "$lat"

    # ── master.key import (must happen BEFORE first boot) ───────────
    case "$MASTER_KEY_SOURCE" in
        auto)
            say "master.key will be auto-generated on first boot"
            ;;
        path:*)
            local src="${MASTER_KEY_SOURCE#path:}"
            [ -r "$src" ] || fail "master.key source not readable: $src"
            osudo install -d -m 0700 "$DATA_DIR"
            osudo install -m 0600 "$src" "$DATA_DIR/master.key"
            ok "imported master.key from $src"
            ;;
        env)
            ok "controller will read SULTIX_MASTER_KEY at runtime"
            ;;
        *)
            fail "--master-key must be auto, path:/foo, or env"
            ;;
    esac

    # ── service install (writes systemd unit, creates user, starts) ─
    os_install_unit

    # ── postgres if requested ───────────────────────────────────────
    if [ "$BACKEND" = "postgres" ]; then
        linux_setup_postgres
    fi

    # ── caddy if requested ──────────────────────────────────────────
    if [ "$PROXY" = "caddy" ]; then
        linux_setup_caddy
    fi

    # ── admin password ──────────────────────────────────────────────
    # On macOS the LaunchAgent boots the controller asynchronously
    # — reset-password needs the admin user row, which the controller
    # creates as part of its first-boot bootstrap. Wait for the HTTP
    # port to come up before we try the CLI write. Linux's systemd
    # path runs reset-password concurrently with the service start
    # without issue (Type=simple + DB locking), so only darwin needs
    # the wait.
    if [ "$OS" = "darwin" ]; then
        say "waiting for controller to come up on $BIND"
        local i=0
        until curl -fsS --max-time 1 "http://$BIND/api/v3/health" >/dev/null 2>&1 \
              || curl -fsS --max-time 1 "http://$BIND/" >/dev/null 2>&1; do
            i=$((i + 1))
            [ "$i" -gt 30 ] && { warn "controller didn't reach $BIND in 30s — check $DARWIN_LOG"; break; }
            sleep 1
        done
    fi
    bootstrap_admin_password

    # ── final summary ───────────────────────────────────────────────
    local ui_url="http://localhost:3000"
    if [ "$PROXY" = "caddy" ] && [ -n "$PROXY_DOMAIN" ]; then
        ui_url="https://$PROXY_DOMAIN"
    fi
    hr
    ok "sultix installed"
    print_paths_summary "$DATA_DIR"
    printf "  admin UI   %s\n" "$ui_url"
    printf "  admin user %s\n" "$ADMIN_USER"
    printf "  admin pw   %s\n" "${ADMIN_PASSWORD_DISPLAY:-sultix00 (DEFAULT — change immediately in Settings)}"
    printf "  backend    %s\n" "$BACKEND"
    [ "$PROXY" = "caddy" ] && printf "  caddy      /etc/caddy/sites-available/sultix.caddyfile\n"
    hr
    warn "master.key + devca/ are NOT in DB backups. Copy them"
    warn "alongside any .sultixdb file when migrating to a new host."
}

main "$@"
