#!/bin/bash PAGEINDEX_PATH="data/pageindex/" PAGE_PATH="data/pages/" PAGE_NAME_PATH="data/pages_by_name/" FORCE_FETCH="$2" CATEGORY="${1//[ \/]/_}" doRequest () { docker run --rm curl-impersonate /build/out/curl_ff95 --max-time 10 --silent --compressed "https://en.touhouwiki.net/${1}" sleep 1 } doUrlEncode () { python3 -c "import urllib.parse; print(urllib.parse.quote_plus('''${1}'''))" } doPageRequest () { doRequest "index.php?curid=${1}&action=raw" } isValidPageResult () { if [[ $(cat "$1") != "" && $(grep ' .mirror.tmp.wiki if [[ $(isValidPageResult .mirror.tmp.wiki) == "yes" ]]; then mv .mirror.tmp.wiki "${RESULT_PATH}" echo "${RESULT_PATH}" return 0 fi return 1 } for f in "${PAGEINDEX_PATH}${CATEGORY}/"*.json; do echo "working on page ${f}" jq -c -r '.query.categorymembers[]' "$f" | while read -r item; do pageid=$(jq -r '.pageid' <<< "$item") title=$(jq -r '.title' <<< "$item") if [[ "${title}" == Category:* || "${title}" == User:* || "${title}" == File:* ]]; then continue fi echo "Fetch page ${pageid} title $title" RESULT_PATH=$(fetchPage "${pageid}" "${title}") if [[ "${RESULT_PATH}" != "" ]]; then linkname="${PAGE_NAME_PATH}${title//[ \/]/_}.wiki" rm "${linkname}" 2>/dev/null ln -s "../pages/${pageid}.wiki" "${linkname}" fi done done