touhouwiki-mirror/filelisting.sh

84 lines
2 KiB
Bash
Executable file

#!/bin/bash
PAGE_PATH="data/pageindex/Files/"
doRequest () {
docker run -it --rm curl-impersonate /build/out/curl_ff95 --silent --compressed "https://en.touhouwiki.net/${1}"
sleep 1
}
doUrlEncode () {
python3 -c "import urllib.parse; print(urllib.parse.quote_plus('''${1}'''))"
}
doFilesRequest () {
CONT=$(doUrlEncode "$1")
if [[ "$CONT" != "" ]]; then
doRequest "api.php?action=query&format=json&list=allimages&aiprop=timestamp|url&aisort=timestamp&ailimit=500&aidir=ascending&aicontinue=${CONT}&*"
else
doRequest "api.php?action=query&format=json&list=allimages&aiprop=timestamp|url&aisort=timestamp&ailimit=500&aidir=ascending"
fi
}
isValidQueryResult () {
if [[ $(jq -r '.query.allimages | length' "$1") -gt 0 ]]; then
echo "yes"
return
fi
echo "no"
return
}
getQueryResultAiContinue () {
jq -r '.continue.aicontinue' "$1"
}
fetchFilesRequestPage () {
RESULT_PATH="${PAGE_PATH}${1}.json"
doFilesRequest "${2}" > .filelisting.tmp.json
if [[ $(isValidQueryResult .filelisting.tmp.json) == "yes" ]]; then
jq . .filelisting.tmp.json > "${RESULT_PATH}"
echo "${RESULT_PATH}"
return 0
fi
rm .filelisting.tmp.json 2>/dev/null
return 1
}
PAGE=1
AICONTINUE=""
mkdir -p "${PAGE_PATH}"
while [[ 1 ]]; do
NEXT_PAGE=$((PAGE+1))
echo "Fetch page $PAGE"
# Page does not exist, try fetch
if [[ ! -f "${PAGE_PATH}${PAGE}.json" ]]; then
RESULT_PATH=$(fetchFilesRequestPage "${PAGE}" "${AICONTINUE}")
if [[ "$RESULT_PATH" == "" ]]; then
break
fi
AICONTINUE=$(getQueryResultAiContinue "${RESULT_PATH}")
# Is it last page? If so fetch
elif [[ ! -f "${PAGE_PATH}${NEXT_PAGE}.json" ]]; then
RESULT_PATH=$(fetchFilesRequestPage "${PAGE}" "${AICONTINUE}")
if [[ "$RESULT_PATH" == "" ]]; then
break
fi
AICONTINUE=$(getQueryResultAiContinue "${RESULT_PATH}")
else
AICONTINUE=$(getQueryResultAiContinue "${PAGE_PATH}${PAGE}.json")
fi
if [[ "$AICONTINUE" == "" || "$AICONTINUE" == "null" ]]; then
break
fi
PAGE=$((PAGE+1))
done