core-deployments/velero/restore.sh

190 lines
6.8 KiB
Bash
Executable file

#!/usr/bin/env bash
# written by Aaron 2023-08-27
command_exists() {
command -v "$1" >/dev/null 2>&1
if [[ $? -ne 0 ]]; then
echo "Software dependency not met: $1"
exit 1
fi
}
command_exists "velero"
command_exists "kubectl"
velero backup get || exit 1
echo -n "Please specify backup to be restored: "
read BACKUPNAME
echo "restore CRDs and services (services are just restored to create the namespaces)"
velero restore create --from-backup $BACKUPNAME --include-resources customresourcedefinitions,services --include-cluster-resources=true restore-part-1 || exit 1
echo "restore Longhorn and ingress controller"
velero restore create --from-backup $BACKUPNAME --include-namespaces nginx-ingress,longhorn-system --include-cluster-resources=true restore-part-2 || exit 1
echo ""
######
# Wait for Longhorn deployment
######
namespace="longhorn-system"
pod_name_pattern="instance-manager"
wait_timeout=300 # Maximum time to wait for pod creation in seconds
wait_interval=5 # Interval for checking pod creation in seconds
echo "Waiting for $pod_name_pattern pods to be created in namespace $namespace..."
# Wait loop to check pod creation
elapsed_time=0
while [ $elapsed_time -lt $wait_timeout ]; do
pod_names=$(kubectl get pods -n "$namespace" --output=jsonpath='{.items[*].metadata.name}' | grep -E "$pod_name_pattern")
if [ -n "$pod_names" ]; then
break
fi
sleep $wait_interval
elapsed_time=$((elapsed_time + wait_interval))
done
if [ -z "$pod_names" ]; then
echo "No pods matching the pattern were created within the specified timeout."
exit 1
fi
echo "$pod_name_pattern pods detected. Proceeding with deletion in 10s..."
sleep 10
######
# Delete Longhorn Instance Pods
######
namespace="longhorn-system"
pod_name_pattern="instance-manager"
# Get the list of pod names in the specified namespace
pod_names=$(kubectl get pods -n "$namespace" --output=jsonpath='{.items[*].metadata.name}')
if [ -z "$pod_names" ]; then
echo "No pods found in namespace $namespace."
exit 0
fi
# Delete the matching pods one by one
for pod_name in $pod_names; do
if echo "$pod_name" | grep -q "^$pod_name_pattern"; then
kubectl delete pod "$pod_name" -n "$namespace"
echo "Deleted pod: $pod_name"
fi
done
function yes_or_no {
while true; do
read -p "$* [y/n]: " yn
case $yn in
[Yy]*) return 0 ;;
[Nn]*) return 1 ;;
esac
done
}
if yes_or_no "Should the script restore all backups automatically?"; then
command_exists "curl"
command_exists "jq"
while :; do
echo -n "longhorn username: "
read USERNAME
echo -n "longhorn password: "
read -s PASSWORD
echo ""
# check if credentials work
API_STATUS_CODE=$(curl -o /dev/null -su "$USERNAME:$PASSWORD" -w "%{http_code}" -X "GET" "https://longhorn.services.yolokube.de/v1")
if [[ $API_STATUS_CODE == "200" ]]; then
break
else
echo "login credentials seem to be wrong, got error $API_STATUS_CODE"
fi
done
# get Backup Target
BACKUPTARGET=$(curl -su "$USERNAME:$PASSWORD" https://longhorn.services.yolokube.de/v1/backuptargets | jq -r ".data[0].backupTargetURL")
while IFS= read -r b; do
LASTBACKUP=$(echo $b | jq -r ".lastBackupName")
VOLUMEID=$(echo $b | jq -r ".id")
API_STATUS_CODE=$(curl -o /dev/null -su "$USERNAME:$PASSWORD" -w "%{http_code}" -X "POST" "https://longhorn.services.yolokube.de/v1/volumes" \
-H 'Content-Type: application/json' \
-d $"{
\"name\": \"${VOLUMEID}\",
\"numberOfReplicas\": 3,
\"accessMode\": \"rwo\",
\"encrypted\": false,
\"restoreVolumeRecurringJob\": \"ignored\",
\"nodeSelector\": [],
\"diskSelector\": [],
\"fromBackup\": \"${BACKUPTARGET}?backup=${LASTBACKUP}&volume=${VOLUMEID}\",
\"staleReplicaTimeout\": 20
}")
if [[ $API_STATUS_CODE != "200" ]]; then
echo "could not restore $VOLUMEID"
fi
done <<< "$(curl -su "$USERNAME:$PASSWORD" https://longhorn.services.yolokube.de/v1/backupvolumes | jq -c '.data[]')"
wait_for_volume () {
counter=0
while [ $counter -lt 50 ]; do
# get volume info
VOLUME=$(curl -su "$USERNAME:$PASSWORD" https://longhorn.services.yolokube.de/v1/volumes/$1)
# parse json
READY=$(echo $VOLUME | jq -r ".ready")
STATE=$(echo $VOLUME | jq -r ".state")
# if the volume is ready return
if [ $READY == "true" ] && [ $STATE == "detached" ]; then
return
fi
let counter++
sleep 5
done
echo "timeout while waiting for volume $1 to become ready"
}
# wait a while for longhorn to create the volumes from backup
echo "waiting for backups to be restored"
sleep 10
# go through all volumes
while IFS= read -r b; do
VOLUMEID=$(echo $b | jq -r ".id")
VOLUMENAME=$(echo $b | jq -r ".name")
NAMESPACE=$(echo $b | jq -r ".kubernetesStatus.namespace")
PVCNAME=$(echo $b | jq -r ".kubernetesStatus.pvcName")
echo "wait for $VOLUMEID to bevome ready"
wait_for_volume $VOLUMEID
echo "volume is ready... create PVs and PVCs"
API_STATUS_CODE=$(curl -o /dev/null -su "$USERNAME:$PASSWORD" -w "%{http_code}" -X "POST" "https://longhorn.services.yolokube.de/v1/volumes/$VOLUMEID?action=pvCreate" \
-H 'Content-Type: application/json' \
-d $"{
\"pvName\":\"${VOLUMENAME}\",
\"fsType\":\"ext4\"
}")
if [[ $API_STATUS_CODE != "200" ]]; then
echo "could not create PV for $VOLUMEID"
fi
API_STATUS_CODE=$(curl -o /dev/null -su "$USERNAME:$PASSWORD" -w "%{http_code}" -X "POST" "https://longhorn.services.yolokube.de/v1/volumes/$VOLUMEID?action=pvcCreate" \
-H 'Content-Type: application/json' \
-d $"{
\"pvcName\":\"${PVCNAME}\",
\"namespace\":\"${NAMESPACE}\"
}")
if [[ $API_STATUS_CODE != "200" ]]; then
echo "could not create PVC for $VOLUMEID"
fi
done <<< "$(curl -su "$USERNAME:$PASSWORD" https://longhorn.services.yolokube.de/v1/volumes | jq -c '.data[]')"
else
echo "Please restore the pvs and pvcs manually. After that write done: "
while :; do
read INPUT
if [[ $INPUT == "done" ]]; then
break
fi
done
fi
echo "Restore everything else"
velero restore create --from-backup "$BACKUPNAME" --exclude-resources persistentvolumes,persistentvolumeclaims restore-part-3 || exit 1