touhouwiki-mirror/files.sh

84 lines
2.1 KiB
Bash
Executable file

#!/bin/bash
PAGEINDEX_PATH="data/pageindex/Files/"
PAGE_PATH="data/files/"
PAGE_NAME_PATH="data/pages_by_name/"
FORCE_FETCH="$1"
docker stop curl-impersonate_files; docker rm curl-impersonate_files
docker run -it -d --rm --name curl-impersonate_files curl-impersonate bash
doFullRequest () {
docker exec curl-impersonate_files /build/out/curl_ff95 --max-time 10 --silent --compressed "${1}"
}
doUrlEncode () {
python3 -c "import urllib.parse; print(urllib.parse.quote_plus('''${1}'''))"
}
isValidPageResult () {
if [[ $(grep '<html' "$1") == "" ]]; then
echo "yes"
return
fi
echo "no"
return
}
fetchFile () {
RESULT_PATH="${PAGE_PATH}${1}"
if [[ -f "${RESULT_PATH}" ]]; then
if [[ "$FORCE_FETCH" != "yes" ]]; then
echo "${RESULT_PATH}"
return 0
fi
fi
doFullRequest "${2}" > .files.tmp
if [[ $(isValidPageResult .files.tmp) == "yes" ]]; then
mv .files.tmp "${RESULT_PATH}"
echo "${RESULT_PATH}"
return 0
fi
return 1
}
for f in "${PAGEINDEX_PATH}"*.json; do
echo "working on page ${f}"
jq -c -r '.query.allimages[]' "$f" | while read -r item; do
pageid=$(jq -r '.descriptionshorturl' <<< "$item")
name=$(jq -r '.name' <<< "$item")
title=$(jq -r '.title' <<< "$item")
url=$(jq -r '.url' <<< "$item")
if [[ "${title}" != File:* ]]; then
continue
fi
ext="${name##*.}"
pageid="${pageid##*=}"
if [[ "${pageid}" == "0" ]]; then
echo "Fetch file name $name title $title extension $ext url $url"
RESULT_PATH=$(fetchFile "${name}" "${url}")
if [[ "${RESULT_PATH}" != "" ]]; then
linkname="${PAGE_NAME_PATH}${title//[ \/]/_}"
rm "${linkname}" 2>/dev/null
ln -s "../files/${name}" "${linkname}"
fi
else
echo "Fetch file ${pageid} title $title extension $ext url $url"
RESULT_PATH=$(fetchFile "${pageid}.${ext}" "${url}")
if [[ "${RESULT_PATH}" != "" ]]; then
linkname="${PAGE_NAME_PATH}${title//[ \/]/_}"
rm "${linkname}" 2>/dev/null
ln -s "../files/${pageid}.${ext}" "${linkname}"
fi
fi
done
done
docker stop curl-impersonate_files; docker rm curl-impersonate_files