From 980d90dd51f9414b60e6a41bdb6c04cc1b4d0b89 Mon Sep 17 00:00:00 2001 From: Lutz Finsterle Date: Sat, 21 Feb 2026 16:47:41 +0100 Subject: [PATCH] Initial commit --- CLAUDE.md | 76 ++++++++++ README.md | 113 +++++++++++++++ ansible/ansible.cfg | 11 ++ ansible/inventory/group_vars/all.yml | 32 ++++ .../inventory/group_vars/linux_servers.yml | 21 +++ ansible/inventory/group_vars/pis.yml | 23 +++ ansible/inventory/hosts.yml | 41 ++++++ ansible/playbooks/check.yml | 39 +++++ ansible/playbooks/deploy.yml | 34 +++++ ansible/playbooks/run-backup.yml | 24 +++ ansible/roles/docker-backup/defaults/main.yml | 2 + ansible/roles/docker-backup/tasks/main.yml | 16 ++ .../templates/docker-post-backup.sh.j2 | 26 ++++ .../templates/docker-pre-backup.sh.j2 | 23 +++ ansible/roles/image-backup/defaults/main.yml | 4 + ansible/roles/image-backup/handlers/main.yml | 10 ++ ansible/roles/image-backup/tasks/main.yml | 35 +++++ .../templates/image-backup.service.j2 | 13 ++ .../image-backup/templates/image-backup.sh.j2 | 53 +++++++ .../templates/image-backup.timer.j2 | 11 ++ ansible/roles/restic-client/defaults/main.yml | 16 ++ ansible/roles/restic-client/handlers/main.yml | 26 ++++ ansible/roles/restic-client/tasks/main.yml | 90 ++++++++++++ .../templates/restic-backup.service.j2 | 15 ++ .../templates/restic-backup.sh.j2 | 60 ++++++++ .../templates/restic-backup.timer.j2 | 12 ++ .../restic-client/templates/restic.env.j2 | 2 + ansible/vault.yml | 18 +++ docs/restore.md | 137 ++++++++++++++++++ docs/synology-setup.md | 92 ++++++++++++ scripts/openwrt-backup.sh | 35 +++++ scripts/synology-setup.sh | 60 ++++++++ 32 files changed, 1170 insertions(+) create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 ansible/ansible.cfg create mode 100644 ansible/inventory/group_vars/all.yml create mode 100644 ansible/inventory/group_vars/linux_servers.yml create mode 100644 ansible/inventory/group_vars/pis.yml create mode 100644 ansible/inventory/hosts.yml create mode 100644 ansible/playbooks/check.yml create mode 100644 ansible/playbooks/deploy.yml create mode 100644 ansible/playbooks/run-backup.yml create mode 100644 ansible/roles/docker-backup/defaults/main.yml create mode 100644 ansible/roles/docker-backup/tasks/main.yml create mode 100644 ansible/roles/docker-backup/templates/docker-post-backup.sh.j2 create mode 100644 ansible/roles/docker-backup/templates/docker-pre-backup.sh.j2 create mode 100644 ansible/roles/image-backup/defaults/main.yml create mode 100644 ansible/roles/image-backup/handlers/main.yml create mode 100644 ansible/roles/image-backup/tasks/main.yml create mode 100644 ansible/roles/image-backup/templates/image-backup.service.j2 create mode 100644 ansible/roles/image-backup/templates/image-backup.sh.j2 create mode 100644 ansible/roles/image-backup/templates/image-backup.timer.j2 create mode 100644 ansible/roles/restic-client/defaults/main.yml create mode 100644 ansible/roles/restic-client/handlers/main.yml create mode 100644 ansible/roles/restic-client/tasks/main.yml create mode 100644 ansible/roles/restic-client/templates/restic-backup.service.j2 create mode 100644 ansible/roles/restic-client/templates/restic-backup.sh.j2 create mode 100644 ansible/roles/restic-client/templates/restic-backup.timer.j2 create mode 100644 ansible/roles/restic-client/templates/restic.env.j2 create mode 100644 ansible/vault.yml create mode 100644 docs/restore.md create mode 100644 docs/synology-setup.md create mode 100644 scripts/openwrt-backup.sh create mode 100644 scripts/synology-setup.sh diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..3713706 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,76 @@ +# HomeBackup Project Conventions + +## Purpose +Backup solution for home lab: 7 Raspberry Pis, OpenWrt router, Linux servers → Synology NAS. + +## Architecture +- **Restic** via REST server (Docker on Synology) for daily incremental backups — HTTP basic auth, no SSH keys +- **dd over NFS** for monthly full SD card images +- **Ansible** for deployment and management — password auth via sshpass +- **OpenWrt** pushes config to a designated Pi; that Pi includes it in its restic backup + +## Key Decisions +- Shared restic repo (all devices, max dedup); snapshots identified by hostname +- REST server backend — avoids SSH key management for backup transport +- Retention per host: 7 daily, 4 weekly, 6 monthly +- Docker containers stopped before backup, restarted after (consistent volumes) +- Image backups kept for 3 months on Synology NFS share + +## Directory Layout +``` +ansible/ + ansible.cfg + vault.yml # ansible-vault encrypted — never commit plaintext + inventory/ + hosts.yml + group_vars/ # vars scoped to groups + host_vars/ # per-device overrides + roles/ + restic-client/ # installs restic, systemd timer, repo init + docker-backup/ # pre/post hooks to stop/start containers + image-backup/ # monthly dd image via NFS + playbooks/ + deploy.yml # push backup stack to all devices + run-backup.yml # ad-hoc trigger + check.yml # verify repo health and timer status +scripts/ + openwrt-backup.sh # runs on OpenWrt router (ash/busybox compatible) + synology-setup.sh # one-time REST server bootstrap on Synology +docs/ + restore.md # recovery runbooks per device type + synology-setup.md # Synology setup guide +``` + +## Secrets +- All passwords in `ansible/vault.yml`, encrypted with `ansible-vault encrypt` +- Vault password kept outside the repo (e.g. in a password manager) +- Never commit an unencrypted vault.yml + +## Common Commands +```bash +# Deploy to all Pis +ansible-playbook ansible/playbooks/deploy.yml --ask-vault-pass + +# Deploy to a single host +ansible-playbook ansible/playbooks/deploy.yml --limit pi1 --ask-vault-pass + +# Trigger ad-hoc backup +ansible-playbook ansible/playbooks/run-backup.yml --ask-vault-pass + +# Check backup health +ansible-playbook ansible/playbooks/check.yml --ask-vault-pass + +# Encrypt vault after editing +ansible-vault encrypt ansible/vault.yml +``` + +## Adding a New Device +1. Add host to `ansible/inventory/hosts.yml` under the correct group +2. Add `ansible/inventory/host_vars/.yml` for any overrides +3. Add password to vault if different from group default +4. Run deploy with `--limit ` + +## Requirements (Control Node) +- Ansible >= 2.12 +- `sshpass` package installed (`apt install sshpass`) +- Python 3 diff --git a/README.md b/README.md new file mode 100644 index 0000000..cbb0e79 --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# HomeBackup + +Automated backup solution for a home lab: Raspberry Pis, OpenWrt router, Linux servers → Synology NAS. + +## What it does + +| What | How | When | +|---|---|---| +| Files, configs, Docker volumes | Restic → Synology REST server | Daily 02:00 | +| Full SD card image | dd → Synology NFS share | Monthly | +| OpenWrt router config | sysupgrade → Pi → Restic | Daily 02:30 | + +## Quick start + +### 1. Set up Synology + +See [docs/synology-setup.md](docs/synology-setup.md). +Run `bash scripts/synology-setup.sh` (fill in your values first). + +### 2. Fill in your inventory + +Edit `ansible/inventory/hosts.yml` — replace all `192.168.x.x` placeholders. +Add per-device overrides in `ansible/inventory/host_vars/.yml` if needed. + +### 3. Configure secrets + +```bash +# Edit vault with your passwords (do NOT commit unencrypted) +vi ansible/vault.yml # fill in all CHANGEME values +ansible-vault encrypt ansible/vault.yml +``` + +### 4. Install Ansible on the control node + +```bash +apt install ansible sshpass # Debian/Ubuntu/Raspberry Pi OS +pip install ansible # alternative +``` + +### 5. Deploy + +```bash +cd ansible + +# Deploy to all devices +ansible-playbook playbooks/deploy.yml --ask-vault-pass + +# Deploy to a single Pi first (test run) +ansible-playbook playbooks/deploy.yml --limit pi1 --ask-vault-pass +``` + +### 6. Set up OpenWrt SSH key (one-time) + +```bash +# On the router +ssh root@ROUTER_IP +ssh-keygen -t ed25519 -f /etc/dropbear/backup_id -N "" +cat /etc/dropbear/backup_id.pub + +# Append the public key to authorized_keys on the receiver Pi +# (the Pi defined as openwrt_backup_receiver_host in group_vars/all.yml) +echo "PUBLIC_KEY_HERE" >> /home/pi/.ssh/authorized_keys + +# Edit scripts/openwrt-backup.sh — set RECEIVER_HOST and RECEIVER_USER +# Then test: +ssh -i /etc/dropbear/backup_id pi@RECEIVER_HOST "echo ok" +``` + +### 7. Verify + +```bash +# Check backup health on all Pis +ansible-playbook playbooks/check.yml --ask-vault-pass +``` + +## Directory layout + +``` +ansible/ + ansible.cfg + vault.yml # encrypted with ansible-vault + inventory/ + hosts.yml + group_vars/ + all.yml # Synology config, restic settings + pis.yml # Pi SSH auth and backup paths + linux_servers.yml + host_vars/ # per-device overrides (create as needed) + roles/ + restic-client/ # installs restic, systemd timer, repo init + docker-backup/ # pre/post hooks to stop/start containers + image-backup/ # monthly dd image via NFS + playbooks/ + deploy.yml + run-backup.yml + check.yml +scripts/ + openwrt-backup.sh # runs on OpenWrt (ash/busybox) + synology-setup.sh # one-time Synology bootstrap +docs/ + synology-setup.md + restore.md # recovery runbooks +``` + +## Restore + +See [docs/restore.md](docs/restore.md) for step-by-step recovery procedures. + +## Adding a new device + +1. Add to `ansible/inventory/hosts.yml` under the right group +2. Create `ansible/inventory/host_vars/.yml` for overrides (optional) +3. Run: `ansible-playbook playbooks/deploy.yml --limit --ask-vault-pass` diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000..4dd1ce1 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,11 @@ +[defaults] +inventory = inventory/ +roles_path = roles +host_key_checking = False +timeout = 30 +stdout_callback = yaml + +[ssh_connection] +# Required for password auth via sshpass +ssh_args = -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null +pipelining = True diff --git a/ansible/inventory/group_vars/all.yml b/ansible/inventory/group_vars/all.yml new file mode 100644 index 0000000..c56397d --- /dev/null +++ b/ansible/inventory/group_vars/all.yml @@ -0,0 +1,32 @@ +--- +# ── Synology / REST server ──────────────────────────────────────────────────── +synology_host: "192.168.x.x" # REPLACE — must match nas ansible_host + +rest_server_port: 8000 +rest_server_user: "{{ vault_rest_server_user }}" +rest_server_password: "{{ vault_rest_server_password }}" + +# Single shared repo — all devices, maximum dedup +restic_repo: "rest:http://{{ rest_server_user }}:{{ rest_server_password }}@{{ synology_host }}:{{ rest_server_port }}/" + +# Repo encryption password — keep this safe, losing it = losing backups +restic_repo_password: "{{ vault_restic_repo_password }}" + +# ── Retention policy ───────────────────────────────────────────────────────── +restic_keep_daily: 7 +restic_keep_weekly: 4 +restic_keep_monthly: 6 + +# ── Schedules (systemd OnCalendar syntax) ──────────────────────────────────── +backup_oncalendar: "*-*-* 02:00:00" # daily at 02:00, staggered by RandomizedDelaySec +image_backup_oncalendar: "monthly" + +# ── NFS for image backup ────────────────────────────────────────────────────── +nfs_image_share: "{{ synology_host }}:/volume1/images" +nfs_mount_point: "/mnt/synology-images" + +# ── OpenWrt backup receiver ─────────────────────────────────────────────────── +# One Pi receives OpenWrt config backups via SCP and includes them in restic. +# Set this to the hostname of the designated Pi. +openwrt_backup_receiver_host: "pi1" # REPLACE if desired +openwrt_backup_receive_dir: "/var/backups/openwrt" diff --git a/ansible/inventory/group_vars/linux_servers.yml b/ansible/inventory/group_vars/linux_servers.yml new file mode 100644 index 0000000..5becb44 --- /dev/null +++ b/ansible/inventory/group_vars/linux_servers.yml @@ -0,0 +1,21 @@ +--- +# ── SSH auth ────────────────────────────────────────────────────────────────── +ansible_user: "{{ vault_ansible_user }}" +ansible_password: "{{ vault_ansible_password }}" +ansible_become: true +ansible_become_method: sudo +ansible_become_password: "{{ vault_ansible_become_password }}" + +# ── Backup paths ────────────────────────────────────────────────────────────── +backup_paths: + - /home + - /etc + - /opt + +backup_excludes: + - /home/*/.cache + - "*.tmp" + +# ── Features — adjust per host in host_vars if needed ───────────────────────── +docker_backup_enabled: false +image_backup_enabled: false diff --git a/ansible/inventory/group_vars/pis.yml b/ansible/inventory/group_vars/pis.yml new file mode 100644 index 0000000..c53c992 --- /dev/null +++ b/ansible/inventory/group_vars/pis.yml @@ -0,0 +1,23 @@ +--- +# ── SSH auth (sshpass) ──────────────────────────────────────────────────────── +ansible_user: "{{ vault_ansible_user }}" +ansible_password: "{{ vault_ansible_password }}" +ansible_become: true +ansible_become_method: sudo +ansible_become_password: "{{ vault_ansible_become_password }}" + +# ── Backup paths ────────────────────────────────────────────────────────────── +backup_paths: + - /home + - /etc + - /opt + - /var/lib/docker/volumes + +backup_excludes: + - /home/*/.cache + - /home/*/.local/share/Trash + - "*.tmp" + +# ── Features ────────────────────────────────────────────────────────────────── +docker_backup_enabled: true +image_backup_enabled: true diff --git a/ansible/inventory/hosts.yml b/ansible/inventory/hosts.yml new file mode 100644 index 0000000..e34b380 --- /dev/null +++ b/ansible/inventory/hosts.yml @@ -0,0 +1,41 @@ +--- +# Fill in ansible_host values before running any playbook. +# Run: ansible-playbook playbooks/deploy.yml --ask-vault-pass +all: + vars: + ansible_python_interpreter: /usr/bin/python3 + + children: + + pis: + hosts: + pi1: + ansible_host: 192.168.x.x # REPLACE + pi2: + ansible_host: 192.168.x.x # REPLACE + pi3: + ansible_host: 192.168.x.x # REPLACE + pi4: + ansible_host: 192.168.x.x # REPLACE + pi5: + ansible_host: 192.168.x.x # REPLACE + pi6: + ansible_host: 192.168.x.x # REPLACE + pi7: + ansible_host: 192.168.x.x # REPLACE + + linux_servers: + hosts: {} # Add other Linux devices here, same structure as pis + + # OpenWrt is managed differently: only the openwrt-backup.sh script is deployed. + # Ansible connects as root (OpenWrt default). + openwrt: + hosts: + router: + ansible_host: 192.168.x.x # REPLACE + ansible_user: root + + synology: + hosts: + nas: + ansible_host: 192.168.x.x # REPLACE — used for NFS and REST server diff --git a/ansible/playbooks/check.yml b/ansible/playbooks/check.yml new file mode 100644 index 0000000..1521817 --- /dev/null +++ b/ansible/playbooks/check.yml @@ -0,0 +1,39 @@ +--- +# Checks backup health: last snapshot age, timer status, repo integrity. +# Usage: ansible-playbook playbooks/check.yml --ask-vault-pass + +- name: Check backup health + hosts: pis:linux_servers + become: true + vars_files: + - ../vault.yml + tasks: + - name: Get last snapshot + ansible.builtin.shell: | + source /etc/restic/restic.env + restic snapshots --host "$(hostname)" --last --json + register: snapshots + changed_when: false + failed_when: false + + - name: Show last snapshot + ansible.builtin.debug: + msg: "{{ snapshots.stdout | from_json | json_query('[0].{time: time, paths: paths}') | default('No snapshots found') }}" + + - name: Check timer status + ansible.builtin.command: systemctl is-active restic-backup.timer + register: timer_active + changed_when: false + failed_when: false + + - name: Show timer status + ansible.builtin.debug: + msg: "restic-backup.timer on {{ inventory_hostname }}: {{ timer_active.stdout }}" + + - name: Report hosts with no snapshots or inactive timers + ansible.builtin.fail: + msg: "PROBLEM on {{ inventory_hostname }}: timer={{ timer_active.stdout }}, snapshots={{ snapshots.stdout[:80] }}" + when: > + timer_active.stdout != 'active' or + snapshots.rc != 0 or + snapshots.stdout == '[]' diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000..7815b68 --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,34 @@ +--- +# Deploys the full backup stack to all Pis and Linux servers. +# Usage: ansible-playbook playbooks/deploy.yml --ask-vault-pass +# ansible-playbook playbooks/deploy.yml --limit pi1 --ask-vault-pass + +- name: Deploy backup stack to Pis and Linux servers + hosts: pis:linux_servers + become: true + vars_files: + - ../vault.yml + roles: + - restic-client + - role: docker-backup + when: docker_backup_enabled | default(false) + - role: image-backup + when: image_backup_enabled | default(false) + +- name: Deploy OpenWrt backup script to router + hosts: openwrt + gather_facts: false + tasks: + - name: Copy OpenWrt backup script + ansible.builtin.copy: + src: ../../scripts/openwrt-backup.sh + dest: /usr/local/bin/openwrt-backup.sh + mode: "0755" + + - name: Schedule OpenWrt backup via cron (daily at 02:30) + # OpenWrt uses BusyBox crond + ansible.builtin.cron: + name: "restic config backup" + minute: "30" + hour: "2" + job: "/usr/local/bin/openwrt-backup.sh >> /tmp/openwrt-backup.log 2>&1" diff --git a/ansible/playbooks/run-backup.yml b/ansible/playbooks/run-backup.yml new file mode 100644 index 0000000..c4be0df --- /dev/null +++ b/ansible/playbooks/run-backup.yml @@ -0,0 +1,24 @@ +--- +# Triggers an immediate backup on target hosts. +# Usage: ansible-playbook playbooks/run-backup.yml --ask-vault-pass +# ansible-playbook playbooks/run-backup.yml --limit pi1 --ask-vault-pass +# Target defaults to all Pis; override with -e "target=pi3" + +- name: Run backup now + hosts: "{{ target | default('pis:linux_servers') }}" + become: true + vars_files: + - ../vault.yml + tasks: + - name: Start restic-backup.service + ansible.builtin.systemd: + name: restic-backup.service + state: started + # Run async — backup can take several minutes + async: 3600 + poll: 10 + register: backup_job + + - name: Show result + ansible.builtin.debug: + msg: "Backup finished on {{ inventory_hostname }}: rc={{ backup_job.rc | default('still running') }}" diff --git a/ansible/roles/docker-backup/defaults/main.yml b/ansible/roles/docker-backup/defaults/main.yml new file mode 100644 index 0000000..98f243e --- /dev/null +++ b/ansible/roles/docker-backup/defaults/main.yml @@ -0,0 +1,2 @@ +--- +restic_log_dir: /var/log/restic diff --git a/ansible/roles/docker-backup/tasks/main.yml b/ansible/roles/docker-backup/tasks/main.yml new file mode 100644 index 0000000..136b3c6 --- /dev/null +++ b/ansible/roles/docker-backup/tasks/main.yml @@ -0,0 +1,16 @@ +--- +- name: Install docker-pre-backup script + ansible.builtin.template: + src: docker-pre-backup.sh.j2 + dest: /usr/local/bin/docker-pre-backup.sh + mode: "0755" + owner: root + group: root + +- name: Install docker-post-backup script + ansible.builtin.template: + src: docker-post-backup.sh.j2 + dest: /usr/local/bin/docker-post-backup.sh + mode: "0755" + owner: root + group: root diff --git a/ansible/roles/docker-backup/templates/docker-post-backup.sh.j2 b/ansible/roles/docker-backup/templates/docker-post-backup.sh.j2 new file mode 100644 index 0000000..908315b --- /dev/null +++ b/ansible/roles/docker-backup/templates/docker-post-backup.sh.j2 @@ -0,0 +1,26 @@ +#!/bin/bash +# Managed by Ansible — do not edit manually +# Restarts containers that were stopped by docker-pre-backup.sh. +set -euo pipefail + +LOG="{{ restic_log_dir }}/backup.log" +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [docker-post] $*" | tee -a "$LOG"; } + +STATE_FILE=/run/restic-stopped-containers + +if [ ! -f "$STATE_FILE" ]; then + log "No state file — nothing to restart" + exit 0 +fi + +CONTAINERS=$(cat "$STATE_FILE") +rm -f "$STATE_FILE" + +if [ -z "$CONTAINERS" ]; then + log "No containers to restart" + exit 0 +fi + +log "Restarting containers" +docker start $CONTAINERS +log "Containers restarted" diff --git a/ansible/roles/docker-backup/templates/docker-pre-backup.sh.j2 b/ansible/roles/docker-backup/templates/docker-pre-backup.sh.j2 new file mode 100644 index 0000000..e85578a --- /dev/null +++ b/ansible/roles/docker-backup/templates/docker-pre-backup.sh.j2 @@ -0,0 +1,23 @@ +#!/bin/bash +# Managed by Ansible — do not edit manually +# Stops all running Docker containers before backup for data consistency. +# Container IDs are written to /run/restic-stopped-containers for the post hook. +set -euo pipefail + +LOG="{{ restic_log_dir }}/backup.log" +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [docker-pre] $*" | tee -a "$LOG"; } + +RUNNING=$(docker ps -q 2>/dev/null || true) + +if [ -z "$RUNNING" ]; then + log "No running containers — nothing to stop" + exit 0 +fi + +NAMES=$(docker ps --format '{% raw %}{{.Names}}{% endraw %}' | tr '\n' ' ') +log "Stopping containers: $NAMES" + +docker stop $RUNNING +echo "$RUNNING" > /run/restic-stopped-containers + +log "All containers stopped" diff --git a/ansible/roles/image-backup/defaults/main.yml b/ansible/roles/image-backup/defaults/main.yml new file mode 100644 index 0000000..bee16e4 --- /dev/null +++ b/ansible/roles/image-backup/defaults/main.yml @@ -0,0 +1,4 @@ +--- +image_device: /dev/mmcblk0 # standard Pi SD card device +nfs_mount_point: /mnt/synology-images +image_keep_count: 3 # keep last N images per host on the NFS share diff --git a/ansible/roles/image-backup/handlers/main.yml b/ansible/roles/image-backup/handlers/main.yml new file mode 100644 index 0000000..2a57488 --- /dev/null +++ b/ansible/roles/image-backup/handlers/main.yml @@ -0,0 +1,10 @@ +--- +- name: Reload systemd + ansible.builtin.systemd: + daemon_reload: true + +- name: Enable image backup timer + ansible.builtin.systemd: + name: image-backup.timer + enabled: true + state: started diff --git a/ansible/roles/image-backup/tasks/main.yml b/ansible/roles/image-backup/tasks/main.yml new file mode 100644 index 0000000..8c0ac0c --- /dev/null +++ b/ansible/roles/image-backup/tasks/main.yml @@ -0,0 +1,35 @@ +--- +- name: Install NFS client + ansible.builtin.package: + name: nfs-common + state: present + +- name: Create NFS mount point + ansible.builtin.file: + path: "{{ nfs_mount_point }}" + state: directory + mode: "0755" + +- name: Install image backup script + ansible.builtin.template: + src: image-backup.sh.j2 + dest: /usr/local/bin/image-backup.sh + mode: "0755" + owner: root + group: root + +- name: Install systemd service + ansible.builtin.template: + src: image-backup.service.j2 + dest: /etc/systemd/system/image-backup.service + mode: "0644" + notify: Reload systemd + +- name: Install systemd timer + ansible.builtin.template: + src: image-backup.timer.j2 + dest: /etc/systemd/system/image-backup.timer + mode: "0644" + notify: + - Reload systemd + - Enable image backup timer diff --git a/ansible/roles/image-backup/templates/image-backup.service.j2 b/ansible/roles/image-backup/templates/image-backup.service.j2 new file mode 100644 index 0000000..52d0170 --- /dev/null +++ b/ansible/roles/image-backup/templates/image-backup.service.j2 @@ -0,0 +1,13 @@ +[Unit] +Description=Monthly SD image backup — {{ ansible_hostname }} +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/image-backup.sh +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/image-backup/templates/image-backup.sh.j2 b/ansible/roles/image-backup/templates/image-backup.sh.j2 new file mode 100644 index 0000000..2e86821 --- /dev/null +++ b/ansible/roles/image-backup/templates/image-backup.sh.j2 @@ -0,0 +1,53 @@ +#!/bin/bash +# Managed by Ansible — do not edit manually +# Creates a compressed SD card image and stores it on the Synology NFS share. +set -euo pipefail + +HOSTNAME="{{ ansible_hostname }}" +DEVICE="{{ image_device }}" +MOUNT="{{ nfs_mount_point }}" +NFS_SHARE="{{ nfs_image_share }}" +LOG="{{ restic_log_dir }}/image-backup.log" +DATE=$(date +%F) +KEEP={{ image_keep_count }} + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG"; } + +log "=== Image backup started: $HOSTNAME ===" + +# ── Mount NFS share ─────────────────────────────────────────────────────────── +MOUNTED=false +if ! mountpoint -q "$MOUNT"; then + log "Mounting $NFS_SHARE → $MOUNT" + mount -t nfs -o ro,soft,timeo=30 "$NFS_SHARE" "$MOUNT" + # Re-mount read-write for writing + umount "$MOUNT" + mount -t nfs "$NFS_SHARE" "$MOUNT" + MOUNTED=true +fi + +DEST="$MOUNT/$HOSTNAME" +mkdir -p "$DEST" + +# ── Rotate old images ───────────────────────────────────────────────────────── +OLD_COUNT=$(ls "$DEST"/*.img.gz 2>/dev/null | wc -l || echo 0) +if [ "$OLD_COUNT" -ge "$KEEP" ]; then + log "Rotating: keeping last $KEEP images" + ls -t "$DEST"/*.img.gz | tail -n +"$((KEEP))" | xargs -r rm -f +fi + +# ── Create image ────────────────────────────────────────────────────────────── +OUTFILE="$DEST/${HOSTNAME}-${DATE}.img.gz" +log "Writing image to $OUTFILE (source: $DEVICE)" + +dd if="$DEVICE" bs=4M status=progress 2>>"$LOG" | gzip > "$OUTFILE" + +SIZE=$(du -sh "$OUTFILE" | cut -f1) +log "Image complete: $OUTFILE ($SIZE)" + +# ── Unmount if we mounted it ────────────────────────────────────────────────── +if [ "$MOUNTED" = true ]; then + umount "$MOUNT" +fi + +log "=== Image backup finished ===" diff --git a/ansible/roles/image-backup/templates/image-backup.timer.j2 b/ansible/roles/image-backup/templates/image-backup.timer.j2 new file mode 100644 index 0000000..132898f --- /dev/null +++ b/ansible/roles/image-backup/templates/image-backup.timer.j2 @@ -0,0 +1,11 @@ +[Unit] +Description=Monthly image backup — {{ ansible_hostname }} + +[Timer] +OnCalendar={{ image_backup_oncalendar }} +# Spread load across a 1-hour window so Pis don't all hammer NFS at once +RandomizedDelaySec=3600 +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/ansible/roles/restic-client/defaults/main.yml b/ansible/roles/restic-client/defaults/main.yml new file mode 100644 index 0000000..4cd47d7 --- /dev/null +++ b/ansible/roles/restic-client/defaults/main.yml @@ -0,0 +1,16 @@ +--- +restic_version: "0.17.3" +restic_binary: /usr/local/bin/restic +restic_env_file: /etc/restic/restic.env +restic_script: /usr/local/bin/restic-backup.sh +restic_log_dir: /var/log/restic + +# Overridden by group_vars/pis.yml and host_vars +backup_paths: + - /home + - /etc + +backup_excludes: [] + +docker_backup_enabled: false +image_backup_enabled: false diff --git a/ansible/roles/restic-client/handlers/main.yml b/ansible/roles/restic-client/handlers/main.yml new file mode 100644 index 0000000..bf332eb --- /dev/null +++ b/ansible/roles/restic-client/handlers/main.yml @@ -0,0 +1,26 @@ +--- +- name: Verify restic install + ansible.builtin.command: "{{ restic_binary }} version" + changed_when: false + +- name: Init restic repo + ansible.builtin.command: "{{ restic_binary }} init" + environment: + RESTIC_REPOSITORY: "{{ restic_repo }}" + RESTIC_PASSWORD: "{{ restic_repo_password }}" + register: restic_init + # OK if already initialized + failed_when: > + restic_init.rc != 0 and + 'already initialized' not in restic_init.stderr + changed_when: restic_init.rc == 0 + +- name: Reload systemd + ansible.builtin.systemd: + daemon_reload: true + +- name: Enable restic timer + ansible.builtin.systemd: + name: restic-backup.timer + enabled: true + state: started diff --git a/ansible/roles/restic-client/tasks/main.yml b/ansible/roles/restic-client/tasks/main.yml new file mode 100644 index 0000000..e65c04b --- /dev/null +++ b/ansible/roles/restic-client/tasks/main.yml @@ -0,0 +1,90 @@ +--- +- name: Install dependencies + ansible.builtin.package: + name: + - curl + - bzip2 + state: present + +- name: Get current restic version + ansible.builtin.command: "{{ restic_binary }} version" + register: restic_installed + ignore_errors: true + changed_when: false + +- name: Install restic binary + ansible.builtin.shell: | + set -e + ARCH=$(dpkg --print-architecture 2>/dev/null || uname -m) + case "$ARCH" in + armhf|armv6l|armv7l) ARCH="arm" ;; + arm64|aarch64) ARCH="arm64" ;; + amd64|x86_64) ARCH="amd64" ;; + esac + curl -fsSL \ + "https://github.com/restic/restic/releases/download/v{{ restic_version }}/restic_{{ restic_version }}_linux_${ARCH}.bz2" \ + | bunzip2 > /tmp/restic_new + install -m 755 /tmp/restic_new {{ restic_binary }} + rm -f /tmp/restic_new + when: > + restic_installed.rc != 0 or + restic_version not in (restic_installed.stdout | default('')) + notify: Verify restic install + +- name: Create config directory + ansible.builtin.file: + path: /etc/restic + state: directory + mode: "0700" + owner: root + group: root + +- name: Create log directory + ansible.builtin.file: + path: "{{ restic_log_dir }}" + state: directory + mode: "0755" + owner: root + group: root + +- name: Write environment file + ansible.builtin.template: + src: restic.env.j2 + dest: "{{ restic_env_file }}" + mode: "0600" + owner: root + group: root + notify: Init restic repo + +- name: Write backup script + ansible.builtin.template: + src: restic-backup.sh.j2 + dest: "{{ restic_script }}" + mode: "0755" + owner: root + group: root + +- name: Create OpenWrt backup receive directory + ansible.builtin.file: + path: "{{ openwrt_backup_receive_dir }}" + state: directory + mode: "0700" + owner: root + group: root + when: inventory_hostname == openwrt_backup_receiver_host + +- name: Install systemd service + ansible.builtin.template: + src: restic-backup.service.j2 + dest: /etc/systemd/system/restic-backup.service + mode: "0644" + notify: Reload systemd + +- name: Install systemd timer + ansible.builtin.template: + src: restic-backup.timer.j2 + dest: /etc/systemd/system/restic-backup.timer + mode: "0644" + notify: + - Reload systemd + - Enable restic timer diff --git a/ansible/roles/restic-client/templates/restic-backup.service.j2 b/ansible/roles/restic-client/templates/restic-backup.service.j2 new file mode 100644 index 0000000..53581b0 --- /dev/null +++ b/ansible/roles/restic-client/templates/restic-backup.service.j2 @@ -0,0 +1,15 @@ +[Unit] +Description=Restic backup — {{ ansible_hostname }} +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart={{ restic_script }} +StandardOutput=journal +StandardError=journal +# Prevent OOM killer from targeting this +OOMScoreAdjust=-100 + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/restic-client/templates/restic-backup.sh.j2 b/ansible/roles/restic-client/templates/restic-backup.sh.j2 new file mode 100644 index 0000000..8983310 --- /dev/null +++ b/ansible/roles/restic-client/templates/restic-backup.sh.j2 @@ -0,0 +1,60 @@ +#!/bin/bash +# Managed by Ansible — do not edit manually +set -euo pipefail + +source {{ restic_env_file }} + +HOSTNAME="{{ ansible_hostname }}" +LOG="{{ restic_log_dir }}/backup.log" + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG"; } + +log "=== Backup started: $HOSTNAME ===" + +# ── Pre-backup hook (stops Docker containers if enabled) ───────────────────── +if [ -x /usr/local/bin/docker-pre-backup.sh ]; then + log "Running pre-backup hook" + /usr/local/bin/docker-pre-backup.sh +fi + +# ── Build exclude arguments ─────────────────────────────────────────────────── +EXCLUDES=( +{% for excl in backup_excludes %} + "--exclude={{ excl }}" +{% endfor %} + "--exclude-caches" +) + +# ── Run backup ──────────────────────────────────────────────────────────────── +{{ restic_binary }} backup \ + --verbose \ + --one-file-system \ + --host "$HOSTNAME" \ + "${EXCLUDES[@]}" \ +{% for path in backup_paths %} + {{ path }}{% if not loop.last %} \{% endif %} + +{% endfor %} +BACKUP_RC=$? + +# ── Post-backup hook (restarts Docker containers) ───────────────────────────── +if [ -x /usr/local/bin/docker-post-backup.sh ]; then + log "Running post-backup hook" + /usr/local/bin/docker-post-backup.sh +fi + +# ── Prune ───────────────────────────────────────────────────────────────────── +if [ "$BACKUP_RC" -eq 0 ]; then + log "Pruning snapshots for $HOSTNAME" + {{ restic_binary }} forget \ + --host "$HOSTNAME" \ + --keep-daily {{ restic_keep_daily }} \ + --keep-weekly {{ restic_keep_weekly }} \ + --keep-monthly {{ restic_keep_monthly }} \ + --prune +else + log "Backup FAILED (rc=$BACKUP_RC) — skipping prune" +fi + +log "=== Backup finished (rc=$BACKUP_RC) ===" +exit $BACKUP_RC diff --git a/ansible/roles/restic-client/templates/restic-backup.timer.j2 b/ansible/roles/restic-client/templates/restic-backup.timer.j2 new file mode 100644 index 0000000..2b6af90 --- /dev/null +++ b/ansible/roles/restic-client/templates/restic-backup.timer.j2 @@ -0,0 +1,12 @@ +[Unit] +Description=Daily restic backup — {{ ansible_hostname }} + +[Timer] +OnCalendar={{ backup_oncalendar }} +# Spread load: each device starts within a random 5-minute window +RandomizedDelaySec=300 +# Run on next boot if missed (e.g. Pi was off at 02:00) +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/ansible/roles/restic-client/templates/restic.env.j2 b/ansible/roles/restic-client/templates/restic.env.j2 new file mode 100644 index 0000000..81eb67a --- /dev/null +++ b/ansible/roles/restic-client/templates/restic.env.j2 @@ -0,0 +1,2 @@ +RESTIC_REPOSITORY={{ restic_repo }} +RESTIC_PASSWORD={{ restic_repo_password }} diff --git a/ansible/vault.yml b/ansible/vault.yml new file mode 100644 index 0000000..7f44d01 --- /dev/null +++ b/ansible/vault.yml @@ -0,0 +1,18 @@ +--- +# IMPORTANT: encrypt this file before committing: +# ansible-vault encrypt ansible/vault.yml +# +# Edit after encryption: +# ansible-vault edit ansible/vault.yml + +# SSH credentials for Pis and Linux servers +vault_ansible_user: "pi" # default SSH user; override in host_vars if different +vault_ansible_password: "CHANGEME" +vault_ansible_become_password: "CHANGEME" + +# Restic REST server HTTP auth +vault_rest_server_user: "restic" +vault_rest_server_password: "CHANGEME" + +# Restic repo encryption — CRITICAL: losing this means losing access to all backups +vault_restic_repo_password: "CHANGEME" diff --git a/docs/restore.md b/docs/restore.md new file mode 100644 index 0000000..dabc7d5 --- /dev/null +++ b/docs/restore.md @@ -0,0 +1,137 @@ +# Restore Runbooks + +## Scenario A — Full disaster recovery (SD card dead) + +### 1. Flash a new SD card from the latest image + +```bash +# On your control node — find the latest image on Synology +ls /mnt/synology-images// + +# Flash to new SD card (replace sdX with your card device) +gunzip -c /mnt/synology-images/pi1/pi1-2025-03-01.img.gz | sudo dd of=/dev/sdX bs=4M status=progress +sync +``` + +Boot the Pi. It will come up with the OS and data as of the image date. + +### 2. Restore data changed since the image (restic) + +After the Pi is booted and on the network: + +```bash +# On the Pi — restore latest snapshot to root +sudo restic restore latest --host pi1 --target / + +# Or restore only specific paths +sudo restic restore latest --host pi1 --include /home --target / +sudo restic restore latest --host pi1 --include /etc --target / +sudo restic restore latest --host pi1 --include /var/lib/docker/volumes --target / +``` + +### 3. Restart Docker + +```bash +sudo docker compose up -d # in each compose project directory +# or +sudo systemctl start docker +``` + +--- + +## Scenario B — Accidental file deletion (restic only) + +```bash +# List snapshots for this host +restic snapshots --host $(hostname) + +# Browse a specific snapshot +restic ls /home/pi/ + +# Restore a single file or directory +restic restore --include /home/pi/important-file --target /tmp/restore + +# Restore to original location +restic restore --include /home/pi/important-file --target / +``` + +--- + +## Scenario C — Restore OpenWrt config + +The OpenWrt backup is a `.tar.gz` created by `sysupgrade`. It lives on the designated Pi at `/var/backups/openwrt/` and is included in that Pi's restic backup. + +### From restic (Pi has been restored) + +```bash +# On the receiver Pi, find the backup +ls /var/backups/openwrt/ + +# Copy to your workstation +scp pi@RECEIVER_HOST:/var/backups/openwrt/openwrt-2025-03-01.tar.gz . +``` + +### Restore on the router + +```bash +# Copy backup to router +scp openwrt-2025-03-01.tar.gz root@ROUTER_IP:/tmp/ + +# Restore via sysupgrade (keeps settings) +ssh root@ROUTER_IP "sysupgrade -r /tmp/openwrt-2025-03-01.tar.gz" +``` + +--- + +## Scenario D — Restore a Docker volume + +```bash +# Restore volume to a temp directory +restic restore latest --host pi1 \ + --include /var/lib/docker/volumes/MY_VOLUME \ + --target /tmp/restore + +# Stop the container +docker compose stop SERVICE + +# Replace volume data +sudo rsync -a /tmp/restore/var/lib/docker/volumes/MY_VOLUME/_data/ \ + /var/lib/docker/volumes/MY_VOLUME/_data/ + +# Start container +docker compose start SERVICE +``` + +--- + +## Useful restic commands + +```bash +# Source env (run as root on the Pi) +source /etc/restic/restic.env + +# List all snapshots for this host +restic snapshots --host $(hostname) + +# Show snapshot content +restic ls + +# Verify repo integrity +restic check + +# Show repo stats +restic stats --host $(hostname) + +# Mount repo as filesystem (requires FUSE) +restic mount /mnt/restic-browse +``` + +--- + +## Recovery time estimates + +| Scenario | Approximate time | +|---|---| +| Flash SD from image (32 GB card) | ~15 min | +| Restic restore (data-only, typical Pi) | 5–30 min depending on changed data | +| OpenWrt config restore | < 5 min | diff --git a/docs/synology-setup.md b/docs/synology-setup.md new file mode 100644 index 0000000..0898a02 --- /dev/null +++ b/docs/synology-setup.md @@ -0,0 +1,92 @@ +# Synology Setup Guide + +## Prerequisites + +- DSM 7.x (DSM 6.x works but Container Manager is called Docker Station) +- Admin access + +## Step 1 — Enable SSH + +DSM → Control Panel → Terminal & SNMP → Terminal tab → enable SSH service. + +## Step 2 — Install Container Manager + +DSM → Package Center → search "Container Manager" → install. + +## Step 3 — Run the setup script + +Fill in your values at the top of `scripts/synology-setup.sh`, then run: + +```bash +bash scripts/synology-setup.sh +``` + +This will: +- Create `/volume1/backups/restic` (REST server data) +- Create `/volume1/images` (monthly SD card images) +- Write an htpasswd file for REST server auth +- Start the `restic/rest-server` Docker container on port 8000 + +## Step 4 — Configure NFS for image backups + +DSM → Control Panel → File Services → NFS: + +1. Enable NFS service (NFSv4 recommended) +2. Go to **Shared Folder** → select (or create) the `images` folder → Edit → NFS permissions +3. Add a rule: + - Hostname/IP: your Pi subnet (e.g. `192.168.1.0/24`) + - Privilege: Read/Write + - Squash: No mapping + - Security: sys + +## Step 5 — Verify + +```bash +# Test REST server (from any host on your network) +curl http://restic:YOUR_PASSWORD@SYNOLOGY_IP:8000/ + +# Test NFS mount (from a Pi) +sudo mount -t nfs SYNOLOGY_IP:/volume1/images /mnt/test +ls /mnt/test +sudo umount /mnt/test +``` + +## REST Server Management + +```bash +# View logs +ssh admin@synology "docker logs restic-rest-server" + +# Restart +ssh admin@synology "docker restart restic-rest-server" + +# Check running +ssh admin@synology "docker ps | grep restic" +``` + +## Storage Layout + +``` +/volume1/ + backups/ + restic/ ← shared restic repo (all devices, dedup'd) + .htpasswd ← REST server auth + config ← restic repo metadata + data/ ← deduplicated backup data + index/ + keys/ + locks/ + snapshots/ + images/ + pi1/ + pi1-2025-01-01.img.gz + pi1-2025-02-01.img.gz + pi1-2025-03-01.img.gz + pi2/ + ... +``` + +## Firewall (if enabled on Synology) + +Allow inbound TCP on port 8000 from your Pi VLANs. +NFS uses TCP/UDP 2049 — allow from Pi subnets. diff --git a/scripts/openwrt-backup.sh b/scripts/openwrt-backup.sh new file mode 100644 index 0000000..bf21e44 --- /dev/null +++ b/scripts/openwrt-backup.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# OpenWrt config backup — ash/busybox compatible +# Runs on the router, pushes backup to a designated Pi via SCP. +# The Pi then includes it in its restic backup automatically. +# +# One-time setup: +# 1. On the router: ssh-keygen -t ed25519 -f /etc/dropbear/backup_id +# 2. Add the public key to ~/.ssh/authorized_keys on RECEIVER_HOST +# 3. Test: ssh -i /etc/dropbear/backup_id pi@RECEIVER_HOST "echo ok" +set -e + +RECEIVER_HOST="192.168.x.x" # REPLACE — IP of the designated Pi +RECEIVER_USER="pi" # REPLACE — SSH user on the Pi +RECEIVER_DIR="/var/backups/openwrt" +SSH_KEY="/etc/dropbear/backup_id" + +BACKUP_FILE="/tmp/openwrt-$(date +%F).tar.gz" + +# Create config archive (includes /etc/config, /etc/openwrt_release, etc.) +sysupgrade --create-backup "$BACKUP_FILE" + +# Push to receiver Pi +scp -i "$SSH_KEY" -o StrictHostKeyChecking=no \ + "$BACKUP_FILE" \ + "${RECEIVER_USER}@${RECEIVER_HOST}:${RECEIVER_DIR}/" + +# Clean up local copy +rm -f "$BACKUP_FILE" + +# Keep only last 14 backups on the Pi +ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no \ + "${RECEIVER_USER}@${RECEIVER_HOST}" \ + "ls -t ${RECEIVER_DIR}/openwrt-*.tar.gz 2>/dev/null | tail -n +15 | xargs -r rm -f" + +logger -t openwrt-backup "Config backup pushed to ${RECEIVER_HOST}:${RECEIVER_DIR}" diff --git a/scripts/synology-setup.sh b/scripts/synology-setup.sh new file mode 100644 index 0000000..084f0ba --- /dev/null +++ b/scripts/synology-setup.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# One-time Synology setup: starts the restic REST server and creates NFS shares. +# Run this on your control node (not on the Synology directly). +# Prerequisites: +# - SSH enabled on Synology (Control Panel → Terminal & SNMP) +# - Container Manager (Docker) installed +# - admin SSH access to Synology +set -euo pipefail + +SYNOLOGY_HOST="192.168.x.x" # REPLACE +SYNOLOGY_USER="admin" # REPLACE + +REST_PORT=8000 +REST_DATA_DIR="/volume1/backups/restic" +IMAGE_DIR="/volume1/images" +REST_SERVER_USER="restic" # REPLACE with your vault_rest_server_user +REST_SERVER_PASS="CHANGEME" # REPLACE with your vault_rest_server_password + +echo "=== Synology Backup Setup ===" +echo "Target: ${SYNOLOGY_USER}@${SYNOLOGY_HOST}" +echo "" + +ssh "${SYNOLOGY_USER}@${SYNOLOGY_HOST}" bash </dev/null || true +docker run -d \ + --name restic-rest-server \ + --restart always \ + -p ${REST_PORT}:8000 \ + -v "${REST_DATA_DIR}:/data" \ + restic/rest-server:latest \ + --append-only \ + --htpasswd-file /data/.htpasswd \ + --no-auth=false + +echo "[4/4] Done." +echo "" +echo "REST server: http://${SYNOLOGY_HOST}:${REST_PORT}/" +echo "Image share: ${SYNOLOGY_HOST}:${IMAGE_DIR} (configure NFS in DSM → File Services → NFS)" +echo "" +echo "Next steps:" +echo " - In DSM: File Services → NFS → enable NFS service" +echo " - In DSM: create NFS share for ${IMAGE_DIR}, allow your Pi subnet (read/write)" +echo " - Test REST server: curl http://${REST_SERVER_USER}:${REST_SERVER_PASS}@${SYNOLOGY_HOST}:${REST_PORT}/" +EOF