#!/bin/bash
# Mirror the VPS repository to local directory
# Usage: vpsmirror.sh LOCAL_DIR [--arch ARCH] [--since DATE]

set -e

LOCAL_DIR=
ARCH=x86_64
DOWNLOAD="curl -L -s --retry 10 --connect-timeout 30 -f --create-dirs"
DOWNLOAD_RETRIES=2

download() {
    local URL=$1
    local OUTPUT=$2
    local TRY
    echo "Download ${URL}"
    for ((TRY=0; TRY<=DOWNLOAD_RETRIES; TRY++)); do
        if [[ $TRY -ne 0 ]]; then
            echo "Downloading $URL... (retry $TRY)"
        fi
        STATUS=0
        $DOWNLOAD "$URL" -o "$OUTPUT" || STATUS=$?
        if [[ $STATUS -eq 0 ]]; then
            return 0
        elif [[ $STATUS -eq 22 ]]; then
            # Do not retry on HTTP error
            break
        fi
    done
    echo "Download failed: $DOWNLOAD $URL -o $OUTPUT [exit code $STATUS]"
    return $STATUS
}

usage() {
    echo "vpsmirror.sh LOCAL_DIR [--arch ARCH] [--since DATE]"
    echo "Options:"
    echo "  --arch              Mirror this arch instead of x86_64 (can be: x86_64, i386)"
    echo "  --since DATE        Skip older VPS than DATE (format: YYMMDD)"
    echo "  -h, --help          Show this help"
}

# Parse options:
while [ "$1" != "" ]; do
    case "$1" in
        --arch )                ARCH=$2; shift ;;
        --since )               SINCE=$2; shift ;;
        -h | --help )           usage; exit 0 ;;
        * )
            if [[ -z ${LOCAL_DIR} ]]; then
                LOCAL_DIR=$1;
            else
                echo "Unsupported option ${1}."
                echo; usage; exit 1
            fi ;;
    esac
    shift
done
if [[ -z ${LOCAL_DIR} ]]; then
    echo "Missing required parameter: LOCAL_DIR"
    echo; usage; exit 1
fi

LOCAL_DIR="${LOCAL_DIR}/${ARCH}/vps9"
REPO_URL="https://linux-av.u.avcdn.net/linux-av/avast/${ARCH}/vps9"


download "${REPO_URL}/index.html" "${LOCAL_DIR}/index.html.new"

if [[ ! -f "${LOCAL_DIR}/index.html" ]]; then
    touch "${LOCAL_DIR}/index.html"
fi
if INDEX_DIFF=$(diff -d -b "${LOCAL_DIR}/index.html" "${LOCAL_DIR}/index.html.new"); then
    echo "The mirror is up to date."
    rm "${LOCAL_DIR}/index.html.new"
    exit 0
fi

REMOVED="$(sed -n -E 's#^<\s*<a href="([^"]+)".*$#\1#p' <<<"${INDEX_DIFF}")"
ADDED="$(sed -n -E 's#^>\s*<a href="([^"]+)".*$#\1#p' <<<"${INDEX_DIFF}")"

# download added and modified files
SKIPPED=0
for FILE in ${ADDED}; do
    DATE=$(sed -nE 's/(vps9([0-9]{6})[0-9]{2}\.(inf|ful)|vps9_([0-9]{6})[0-9]{2}_[0-9]{8}.dif)/\2\4/p' <<<"${FILE}")
    if [[ -n ${DATE} && ${DATE} -lt ${SINCE} ]]; then
        ((SKIPPED+=1))
        continue
    elif [[ ${SKIPPED} -gt 0 ]]; then
        echo "Skipped ${SKIPPED} files older than $SINCE"
        SKIPPED=0
    fi
    URL="${REPO_URL}/${FILE}"
    if download "${URL}" "${LOCAL_DIR}/${FILE}.tmp" ; then
        mv "${LOCAL_DIR}/${FILE}.tmp" "${LOCAL_DIR}/${FILE}"
    else
        CURL_ERR=$?
        if [[ ${CURL_ERR} -eq 22 ]]; then
            echo "-- Not found, skipped (curl error ${CURL_ERR})"
        else
            echo "-- Failed (curl error ${CURL_ERR})"
            exit 1
        fi
    fi
done

# delete removed files
for FILE in ${REMOVED}; do
    # if it's in both REMOVED and ADDED, it was modified - do not delete
    if ! grep -q "^${FILE}$" <<<"${ADDED}"; then
        echo "Remove ${FILE}"
        rm -f "${LOCAL_DIR}/${FILE}"
    fi
done

# update local index
# (If something failed above, the old index will stay in place, so the sync can be repeated.)
mv "${LOCAL_DIR}/index.html.new" "${LOCAL_DIR}/index.html"
