Add security-updates Ansible role for automated patching

Comprehensive security update automation:

1. Debian OS (unattended-upgrades) — tightened to security-only:
   - Removed general Debian updates (prevents feature/breaking changes)
   - Only Debian-Security origins auto-installed
   - Email admin on every upgrade via ops@performancewest.net
   - Auto-reboot at 4 AM if kernel update requires it
   - needrestart auto-restarts services after library updates

2. Docker CE — major version guard:
   - Patch updates within pinned major version auto-applied
   - Major version jumps held + admin alerted for manual review
   - docker-ce, docker-ce-cli, containerd.io all version-guarded

3. Container base images — daily at 3:30 AM:
   - Pulls latest base images for all docker-compose services
   - Compares image digests — only rebuilds if changed
   - Restarts only affected services (not full stack)
   - Alerts admin on rebuild failures requiring manual intervention
   - Covers both prod and dev compose projects

4. k3s — weekly Sunday at 3:45 AM:
   - Patch updates within current minor auto-applied
   - Minor/major upgrades alert admin for manual review
   - Verifies node Ready status after update
   - Alerts on failures with investigation instructions

5. Admin notifications via SMTP:
   - [INFO] for successful patches
   - [WARNING] for available major upgrades needing review
   - [CRITICAL] for failures requiring immediate intervention
   - Falls back to syslog if SMTP unavailable

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
justin 2026-04-30 01:24:57 -05:00
parent 611b8a9600
commit 97e8664cbf
13 changed files with 536 additions and 6 deletions

View file

@ -32,3 +32,4 @@
- worker-crons
- shkeeper
- nginx
- security-updates

View file

@ -72,15 +72,24 @@
enabled: true
state: started
- name: Configure unattended-upgrades — origins
- name: Configure unattended-upgrades — security only
ansible.builtin.copy:
content: |
// Automatically install security updates and stable updates
// SECURITY UPDATES ONLY — no feature upgrades
// General Debian updates excluded to prevent breaking changes
Unattended-Upgrade::Origins-Pattern {
"origin=Debian,codename=${distro_codename},label=Debian";
"origin=Debian,codename=${distro_codename},label=Debian-Security";
"origin=Debian,codename=${distro_codename}-security,label=Debian-Security";
"origin=Debian,codename=${distro_codename}-updates,label=Debian";
// Docker CE security patches (from download.docker.com repo)
"origin=Docker,codename=${distro_codename},label=Docker CE";
};
// Never auto-upgrade these — require manual intervention
Unattended-Upgrade::Package-Blacklist {
"docker-ce";
"docker-ce-cli";
"containerd.io";
"k3s";
};
// Auto-remove unused kernel packages and dependencies
@ -91,12 +100,17 @@
// Reboot at 4 AM if a kernel update requires it
Unattended-Upgrade::Automatic-Reboot "true";
Unattended-Upgrade::Automatic-Reboot-Time "04:00";
Unattended-Upgrade::Automatic-Reboot-WithUsers "false";
// Email notification (optional — only if mail is configured)
// Unattended-Upgrade::Mail "root";
// Email admin on upgrades and errors
Unattended-Upgrade::Mail "{{ smtp_admin_email }}";
Unattended-Upgrade::MailReport "on-change";
// Log to syslog
Unattended-Upgrade::SyslogEnable "true";
// Don't auto-upgrade if dpkg --configure is pending (broken state)
Unattended-Upgrade::MinimalSteps "true";
dest: /etc/apt/apt.conf.d/50unattended-upgrades
owner: root
group: root

View file

@ -0,0 +1,32 @@
---
# ── Security Updates Role ────────────────────────────────────────────────────
# Handles: Docker CE patches, container base image updates, k3s patches
# Principle: security patches auto-applied; feature/major upgrades blocked
# Admin email for alerts requiring manual intervention
security_admin_email: "{{ smtp_admin_email }}"
# SMTP for sending alerts (uses system mail if available, falls back to curl)
security_smtp_host: "{{ smtp_host }}"
security_smtp_port: "{{ smtp_port }}"
security_smtp_user: "{{ smtp_user }}"
security_smtp_pass: "{{ smtp_pass }}"
security_smtp_from: "{{ smtp_from }}"
# Docker container update schedule (cron)
# Default: 3:30 AM daily (after unattended-upgrades at ~3 AM, before reboot at 4 AM)
container_update_hour: "3"
container_update_minute: "30"
# k3s update check schedule
k3s_update_hour: "3"
k3s_update_minute: "45"
# Project directories to scan for docker-compose.yml
compose_projects:
- "{{ project_dir }}"
- "{{ dev_project_dir }}"
# Docker major version pin (only allow patch updates within this major)
# e.g. "27" means 27.x.y patches are auto-applied, 28.x requires manual
docker_major_version_pin: "27"

View file

@ -0,0 +1,16 @@
---
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
- name: Enable container update timer
ansible.builtin.systemd:
name: pw-container-update.timer
enabled: true
state: started
- name: Enable k3s update timer
ansible.builtin.systemd:
name: pw-k3s-update.timer
enabled: true
state: started

View file

@ -0,0 +1,122 @@
---
# ══════════════════════════════════════════════════════════════════════════════
# Security Updates Role
#
# 1. Configures apt to allow Docker CE security patches (not major upgrades)
# 2. Deploys a container image update script (pulls new base images, rebuilds
# if changed, restarts affected services)
# 3. Deploys a k3s patch update script (stays on current channel, patch only)
# 4. Sets up systemd timers for all of the above
# 5. Sends admin email when manual intervention is required
# ══════════════════════════════════════════════════════════════════════════════
# ── 1. Docker CE: pin major version, allow patch updates ────────────────────
- name: Install apt-mark hold for Docker CE (prevent major version jumps)
ansible.builtin.shell: |
current=$(dpkg-query -W -f='${Version}' docker-ce 2>/dev/null | grep -oP '^\d+' || echo "")
if [ -n "$current" ] && [ "$current" = "{{ docker_major_version_pin }}" ]; then
# Same major — ensure NOT held so patches flow through unattended-upgrades
apt-mark unhold docker-ce docker-ce-cli containerd.io 2>/dev/null || true
else
# Different major — hold to prevent auto-upgrade
apt-mark hold docker-ce docker-ce-cli containerd.io 2>/dev/null || true
fi
changed_when: false
- name: Deploy Docker version guard script
ansible.builtin.template:
src: docker-version-guard.sh.j2
dest: /usr/local/bin/pw-docker-version-guard
owner: root
group: root
mode: "0755"
# ── 2. Container base image security updates ───────────────────────────────
- name: Deploy container image update script
ansible.builtin.template:
src: container-security-update.sh.j2
dest: /usr/local/bin/pw-container-security-update
owner: root
group: root
mode: "0755"
- name: Deploy container update systemd service
ansible.builtin.template:
src: pw-container-update.service.j2
dest: /etc/systemd/system/pw-container-update.service
owner: root
group: root
mode: "0644"
notify: Reload systemd
- name: Deploy container update systemd timer
ansible.builtin.template:
src: pw-container-update.timer.j2
dest: /etc/systemd/system/pw-container-update.timer
owner: root
group: root
mode: "0644"
notify:
- Reload systemd
- Enable container update timer
# ── 3. k3s patch updates ───────────────────────────────────────────────────
- name: Deploy k3s patch update script
ansible.builtin.template:
src: k3s-security-update.sh.j2
dest: /usr/local/bin/pw-k3s-security-update
owner: root
group: root
mode: "0755"
- name: Deploy k3s update systemd service
ansible.builtin.template:
src: pw-k3s-update.service.j2
dest: /etc/systemd/system/pw-k3s-update.service
owner: root
group: root
mode: "0644"
notify: Reload systemd
- name: Deploy k3s update systemd timer
ansible.builtin.template:
src: pw-k3s-update.timer.j2
dest: /etc/systemd/system/pw-k3s-update.timer
owner: root
group: root
mode: "0644"
notify:
- Reload systemd
- Enable k3s update timer
# ── 4. Admin alert helper ──────────────────────────────────────────────────
- name: Deploy admin alert script
ansible.builtin.template:
src: pw-security-alert.sh.j2
dest: /usr/local/bin/pw-security-alert
owner: root
group: root
mode: "0755"
# ── 5. Ensure needrestart is installed for library/service detection ───────
- name: Install needrestart for detecting outdated services
ansible.builtin.apt:
name: needrestart
state: present
- name: Configure needrestart — auto-restart services, never interactive
ansible.builtin.copy:
content: |
# Auto-restart services after library updates (no prompts)
$nrconf{restart} = 'a';
# Don't restart these (handled by our own scripts)
$nrconf{blacklist_rc} = [qr(^docker), qr(^containerd), qr(^k3s)];
dest: /etc/needrestart/conf.d/pw-security.conf
owner: root
group: root
mode: "0644"

View file

@ -0,0 +1,112 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════════════════════════════════
# Container Security Update — Performance West
#
# Pulls latest base images for all docker-compose services. If any image
# has changed (security patches in upstream base), rebuilds and restarts
# only the affected services. Sends admin alert if a rebuild fails.
#
# Runs via systemd timer (pw-container-update.timer)
# ══════════════════════════════════════════════════════════════════════════════
set -euo pipefail
LOG_TAG="pw-container-update"
ALERT_SCRIPT="/usr/local/bin/pw-security-alert"
COMPOSE_PROJECTS=({{ compose_projects | map('quote') | join(' ') }})
log() { logger -t "$LOG_TAG" "$*"; echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
updated_services=()
failed_services=()
for PROJECT_DIR in "${COMPOSE_PROJECTS[@]}"; do
if [ ! -f "$PROJECT_DIR/docker-compose.yml" ]; then
log "SKIP: No docker-compose.yml in $PROJECT_DIR"
continue
fi
log "Checking $PROJECT_DIR for base image updates..."
cd "$PROJECT_DIR"
# Get list of services that use a build context (not pre-built images)
build_services=$(docker compose config --services 2>/dev/null || true)
for svc in $build_services; do
# Get the base image from the Dockerfile
dockerfile=$(docker compose config | python3 -c "
import sys, yaml
cfg = yaml.safe_load(sys.stdin)
svc_cfg = cfg.get('services', {}).get('$svc', {})
build = svc_cfg.get('build', {})
if isinstance(build, str):
print(build + '/Dockerfile')
elif isinstance(build, dict):
ctx = build.get('context', '.')
df = build.get('dockerfile', 'Dockerfile')
print(ctx + '/' + df)
" 2>/dev/null || echo "")
if [ -z "$dockerfile" ] || [ ! -f "$dockerfile" ]; then
continue
fi
# Extract FROM image(s)
base_images=$(grep -i '^FROM ' "$dockerfile" | awk '{print $2}' | grep -v '^\$' | sort -u)
needs_rebuild=false
for img in $base_images; do
if [ "$img" = "scratch" ]; then continue; fi
# Pull and check if digest changed
old_digest=$(docker image inspect "$img" --format '{{ '{{' }}.Id{{ '}}' }}' 2>/dev/null || echo "none")
docker pull "$img" --quiet >/dev/null 2>&1 || continue
new_digest=$(docker image inspect "$img" --format '{{ '{{' }}.Id{{ '}}' }}' 2>/dev/null || echo "none2")
if [ "$old_digest" != "$new_digest" ]; then
log "UPDATE: Base image $img changed for service $svc"
needs_rebuild=true
fi
done
if $needs_rebuild; then
log "Rebuilding $svc in $PROJECT_DIR..."
if docker compose build --no-cache "$svc" 2>&1 | tail -5; then
docker compose up -d "$svc" 2>&1
updated_services+=("$svc ($PROJECT_DIR)")
log "OK: $svc rebuilt and restarted"
else
failed_services+=("$svc ($PROJECT_DIR)")
log "FAIL: $svc rebuild failed"
fi
fi
done
# Also pull pre-built images (postgres, redis, mariadb, etc.)
pull_output=$(docker compose pull 2>&1 || true)
if echo "$pull_output" | grep -q "Pull complete"; then
log "Pre-built images updated in $PROJECT_DIR, restarting affected..."
docker compose up -d 2>&1 | tail -5
fi
done
# Clean up dangling images
docker image prune -f >/dev/null 2>&1 || true
# Report
if [ ${#updated_services[@]} -gt 0 ]; then
log "Updated services: ${updated_services[*]}"
$ALERT_SCRIPT "Container Security Update Complete" \
"The following services were rebuilt with updated base images:\n\n$(printf ' - %s\n' "${updated_services[@]}")\n\nAll services restarted successfully." \
"info"
fi
if [ ${#failed_services[@]} -gt 0 ]; then
log "FAILED services: ${failed_services[*]}"
$ALERT_SCRIPT "Container Rebuild FAILED — Manual Intervention Required" \
"The following services failed to rebuild after base image security updates:\n\n$(printf ' - %s\n' "${failed_services[@]}")\n\nPlease SSH in and investigate:\n ssh -p {{ ssh_port }} {{ deploy_user }}@$(hostname -f)\n cd <project_dir> && docker compose build <service> && docker compose up -d <service>" \
"critical"
fi
if [ ${#updated_services[@]} -eq 0 ] && [ ${#failed_services[@]} -eq 0 ]; then
log "All container base images are current. No updates needed."
fi

View file

@ -0,0 +1,35 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════════════════════════════════
# Docker Version Guard — Performance West
#
# Checks if a Docker CE major version upgrade is available. If so, holds the
# current packages and alerts the admin. Patch updates within the pinned
# major version are allowed to flow through unattended-upgrades.
#
# Run by: apt pre-invoke hook or manually
# ══════════════════════════════════════════════════════════════════════════════
set -euo pipefail
PINNED_MAJOR="{{ docker_major_version_pin }}"
ALERT_SCRIPT="/usr/local/bin/pw-security-alert"
current_version=$(dpkg-query -W -f='${Version}' docker-ce 2>/dev/null || echo "0.0.0")
current_major=$(echo "$current_version" | grep -oP '^\d+' || echo "0")
# Check what's available
apt-get update -qq 2>/dev/null
available_version=$(apt-cache policy docker-ce 2>/dev/null | grep 'Candidate:' | awk '{print $2}')
available_major=$(echo "$available_version" | grep -oP '5:\K\d+' || echo "$current_major")
if [ "$available_major" != "$PINNED_MAJOR" ] && [ "$available_major" != "$current_major" ]; then
# Major version jump detected — hold packages and alert admin
apt-mark hold docker-ce docker-ce-cli containerd.io 2>/dev/null || true
logger -t "pw-docker-guard" "Docker CE major upgrade available: $current_version -> $available_version (held)"
$ALERT_SCRIPT "Docker CE Major Upgrade Available — Manual Review Required" \
"A Docker CE major version upgrade is available but has been held:\n\n Current: $current_version (major $current_major)\n Available: $available_version (major $available_major)\n Pinned major: $PINNED_MAJOR\n\nMajor Docker upgrades may include breaking changes. Please review the changelog and upgrade manually:\n\n apt-mark unhold docker-ce docker-ce-cli containerd.io\n apt-get update && apt-get upgrade docker-ce docker-ce-cli containerd.io\n systemctl restart docker\n cd {{ project_dir }} && docker compose up -d" \
"warning"
else
# Same major — ensure packages are NOT held (allow patch updates)
apt-mark unhold docker-ce docker-ce-cli containerd.io 2>/dev/null || true
fi

View file

@ -0,0 +1,90 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════════════════════════════════
# k3s Security Patch Update — Performance West
#
# Checks for k3s patch updates within the current minor version. Installs
# patch updates automatically (e.g. v1.28.5 → v1.28.9). Alerts admin if
# a minor/major version upgrade is available (requires manual intervention).
#
# Runs via systemd timer (pw-k3s-update.timer)
# ══════════════════════════════════════════════════════════════════════════════
set -euo pipefail
LOG_TAG="pw-k3s-update"
ALERT_SCRIPT="/usr/local/bin/pw-security-alert"
log() { logger -t "$LOG_TAG" "$*"; echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
# Check if k3s is installed
if ! command -v k3s &>/dev/null; then
log "k3s not installed — skipping"
exit 0
fi
current_version=$(k3s --version 2>/dev/null | grep -oP 'v\K[\d.]+' || echo "0.0.0")
current_minor=$(echo "$current_version" | grep -oP '^\d+\.\d+')
log "Current k3s version: v$current_version (minor: $current_minor)"
# Query GitHub API for latest release in the same minor channel
latest_patch=$(curl -sfL "https://api.github.com/repos/k3s-io/k3s/releases" 2>/dev/null | \
python3 -c "
import sys, json
releases = json.load(sys.stdin)
minor = '$current_minor'
for r in releases:
tag = r.get('tag_name', '')
if tag.startswith('v' + minor) and not r.get('prerelease') and not r.get('draft'):
print(tag.lstrip('v'))
break
" 2>/dev/null || echo "")
if [ -z "$latest_patch" ]; then
log "Could not determine latest k3s patch version — skipping"
exit 0
fi
# Also check for available minor/major upgrades
latest_stable=$(curl -sfL "https://api.github.com/repos/k3s-io/k3s/releases/latest" 2>/dev/null | \
python3 -c "import sys,json; print(json.load(sys.stdin).get('tag_name','').lstrip('v'))" 2>/dev/null || echo "")
latest_stable_minor=$(echo "$latest_stable" | grep -oP '^\d+\.\d+' || echo "$current_minor")
if [ "$latest_stable_minor" != "$current_minor" ] && [ -n "$latest_stable" ]; then
log "k3s minor upgrade available: v$current_version → v$latest_stable"
$ALERT_SCRIPT "k3s Minor Upgrade Available — Manual Review Required" \
"A k3s minor version upgrade is available:\n\n Current: v$current_version\n Latest stable: v$latest_stable\n\nMinor upgrades may include API changes. Review the changelog and upgrade:\n\n curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=v$latest_stable sh -\n systemctl restart k3s\n kubectl get nodes" \
"warning"
fi
# Apply patch update if available
if [ "$latest_patch" != "$current_version" ] && [ -n "$latest_patch" ]; then
log "Applying k3s patch update: v$current_version → v$latest_patch"
if curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="v$latest_patch" sh - 2>&1; then
systemctl restart k3s
sleep 10
new_version=$(k3s --version 2>/dev/null | grep -oP 'v\K[\d.]+' || echo "unknown")
log "k3s updated to v$new_version"
# Verify node is Ready
if kubectl get nodes 2>/dev/null | grep -q "Ready"; then
log "k3s node is Ready after patch update"
$ALERT_SCRIPT "k3s Security Patch Applied" \
"k3s updated from v$current_version to v$new_version.\nNode status: Ready.\nAll pods running normally." \
"info"
else
log "WARNING: k3s node not Ready after update"
$ALERT_SCRIPT "k3s Patch Applied — Node Not Ready" \
"k3s updated from v$current_version to v$new_version but the node is not in Ready state.\n\nPlease investigate:\n ssh -p {{ ssh_port }} {{ deploy_user }}@$(hostname -f)\n kubectl get nodes\n kubectl get pods -A" \
"critical"
fi
else
log "FAIL: k3s patch update failed"
$ALERT_SCRIPT "k3s Patch Update FAILED" \
"Failed to update k3s from v$current_version to v$latest_patch.\n\nPlease investigate:\n ssh -p {{ ssh_port }} {{ deploy_user }}@$(hostname -f)\n journalctl -u k3s --since '1 hour ago'" \
"critical"
fi
else
log "k3s v$current_version is the latest patch in the $current_minor channel"
fi

View file

@ -0,0 +1,12 @@
[Unit]
Description=Performance West Container Security Update
After=docker.service
Requires=docker.service
[Service]
Type=oneshot
ExecStart=/usr/local/bin/pw-container-security-update
User=root
StandardOutput=journal
StandardError=journal
TimeoutStartSec=1800

View file

@ -0,0 +1,10 @@
[Unit]
Description=Daily container security update check
[Timer]
OnCalendar=*-*-* {{ container_update_hour }}:{{ container_update_minute }}:00
Persistent=true
RandomizedDelaySec=300
[Install]
WantedBy=timers.target

View file

@ -0,0 +1,11 @@
[Unit]
Description=Performance West k3s Security Patch Update
After=k3s.service
[Service]
Type=oneshot
ExecStart=/usr/local/bin/pw-k3s-security-update
User=root
StandardOutput=journal
StandardError=journal
TimeoutStartSec=600

View file

@ -0,0 +1,10 @@
[Unit]
Description=Weekly k3s security patch check
[Timer]
OnCalendar=Sun *-*-* {{ k3s_update_hour }}:{{ k3s_update_minute }}:00
Persistent=true
RandomizedDelaySec=600
[Install]
WantedBy=timers.target

View file

@ -0,0 +1,65 @@
#!/usr/bin/env bash
# ══════════════════════════════════════════════════════════════════════════════
# Security Alert Mailer — Performance West
#
# Usage: pw-security-alert "Subject" "Body message" "level"
# level: info | warning | critical
#
# Sends email to admin via SMTP. Falls back to logger if SMTP unavailable.
# ══════════════════════════════════════════════════════════════════════════════
set -euo pipefail
SUBJECT="${1:-Security Alert}"
BODY="${2:-No details provided.}"
LEVEL="${3:-info}"
TO="{{ security_admin_email }}"
FROM="{{ security_smtp_from }}"
SMTP_HOST="{{ security_smtp_host }}"
SMTP_PORT="{{ security_smtp_port }}"
SMTP_USER="{{ security_smtp_user }}"
SMTP_PASS="{{ security_smtp_pass }}"
HOSTNAME=$(hostname -f 2>/dev/null || hostname)
# Prefix subject with level
case "$LEVEL" in
critical) PREFIX="[CRITICAL]" ;;
warning) PREFIX="[WARNING]" ;;
*) PREFIX="[INFO]" ;;
esac
FULL_SUBJECT="$PREFIX $SUBJECT — $HOSTNAME"
# Format body with server info
FULL_BODY="$(echo -e "$BODY")\n\n---\nServer: $HOSTNAME\nTime: $(date '+%Y-%m-%d %H:%M:%S %Z')\nLevel: $LEVEL"
# Try sending via Python (most reliable with SMTP auth)
if command -v python3 &>/dev/null; then
python3 - <<PYEOF
import smtplib
from email.mime.text import MIMEText
msg = MIMEText("""$FULL_BODY""")
msg["Subject"] = "$FULL_SUBJECT"
msg["From"] = "$FROM"
msg["To"] = "$TO"
try:
with smtplib.SMTP("$SMTP_HOST", $SMTP_PORT, timeout=30) as s:
s.starttls()
s.login("$SMTP_USER", "$SMTP_PASS")
s.send_message(msg)
print("Alert sent to $TO")
except Exception as e:
print(f"SMTP failed: {e}")
exit(1)
PYEOF
if [ $? -eq 0 ]; then
logger -t "pw-security-alert" "[$LEVEL] $SUBJECT — sent to $TO"
exit 0
fi
fi
# Fallback: log only
logger -t "pw-security-alert" "[$LEVEL] $SUBJECT (email delivery failed — logged only)"
logger -t "pw-security-alert" "$BODY"