84 lines
2 KiB
Bash
Executable file
84 lines
2 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
PAGE_PATH="data/pageindex/Files/"
|
|
|
|
doRequest () {
|
|
docker run -it --rm curl-impersonate /build/out/curl_ff95 --silent --compressed "https://en.touhouwiki.net/${1}"
|
|
sleep 1
|
|
}
|
|
|
|
doUrlEncode () {
|
|
python3 -c "import urllib.parse; print(urllib.parse.quote_plus('''${1}'''))"
|
|
}
|
|
|
|
doFilesRequest () {
|
|
CONT=$(doUrlEncode "$1")
|
|
if [[ "$CONT" != "" ]]; then
|
|
doRequest "api.php?action=query&format=json&list=allimages&aiprop=timestamp|url&aisort=timestamp&ailimit=500&aidir=ascending&aicontinue=${CONT}&*"
|
|
else
|
|
doRequest "api.php?action=query&format=json&list=allimages&aiprop=timestamp|url&aisort=timestamp&ailimit=500&aidir=ascending"
|
|
fi
|
|
}
|
|
|
|
isValidQueryResult () {
|
|
if [[ $(jq -r '.query.allimages | length' "$1") -gt 0 ]]; then
|
|
echo "yes"
|
|
return
|
|
fi
|
|
echo "no"
|
|
return
|
|
}
|
|
|
|
getQueryResultAiContinue () {
|
|
jq -r '.continue.aicontinue' "$1"
|
|
}
|
|
|
|
fetchFilesRequestPage () {
|
|
RESULT_PATH="${PAGE_PATH}${1}.json"
|
|
doFilesRequest "${2}" > .filelisting.tmp.json
|
|
if [[ $(isValidQueryResult .filelisting.tmp.json) == "yes" ]]; then
|
|
jq . .filelisting.tmp.json > "${RESULT_PATH}"
|
|
echo "${RESULT_PATH}"
|
|
return 0
|
|
fi
|
|
rm .filelisting.tmp.json 2>/dev/null
|
|
|
|
return 1
|
|
}
|
|
|
|
PAGE=1
|
|
AICONTINUE=""
|
|
|
|
mkdir -p "${PAGE_PATH}"
|
|
|
|
while [[ 1 ]]; do
|
|
|
|
NEXT_PAGE=$((PAGE+1))
|
|
|
|
echo "Fetch page $PAGE"
|
|
|
|
# Page does not exist, try fetch
|
|
if [[ ! -f "${PAGE_PATH}${PAGE}.json" ]]; then
|
|
RESULT_PATH=$(fetchFilesRequestPage "${PAGE}" "${AICONTINUE}")
|
|
if [[ "$RESULT_PATH" == "" ]]; then
|
|
break
|
|
fi
|
|
AICONTINUE=$(getQueryResultAiContinue "${RESULT_PATH}")
|
|
# Is it last page? If so fetch
|
|
elif [[ ! -f "${PAGE_PATH}${NEXT_PAGE}.json" ]]; then
|
|
RESULT_PATH=$(fetchFilesRequestPage "${PAGE}" "${AICONTINUE}")
|
|
if [[ "$RESULT_PATH" == "" ]]; then
|
|
break
|
|
fi
|
|
AICONTINUE=$(getQueryResultAiContinue "${RESULT_PATH}")
|
|
else
|
|
AICONTINUE=$(getQueryResultAiContinue "${PAGE_PATH}${PAGE}.json")
|
|
fi
|
|
|
|
if [[ "$AICONTINUE" == "" || "$AICONTINUE" == "null" ]]; then
|
|
break
|
|
fi
|
|
|
|
PAGE=$((PAGE+1))
|
|
done
|