#!/bin/bash PAGEINDEX_PATH="data/pageindex/Files/" PAGE_PATH="data/files/" PAGE_NAME_PATH="data/pages_by_name/" FORCE_FETCH="$1" docker stop curl-impersonate_files; docker rm curl-impersonate_files docker run -it -d --rm --name curl-impersonate_files curl-impersonate bash doFullRequest () { docker exec curl-impersonate_files /build/out/curl_ff95 --max-time 10 --silent --compressed "${1}" } doUrlEncode () { python3 -c "import urllib.parse; print(urllib.parse.quote_plus('''${1}'''))" } isValidPageResult () { if [[ $(grep ' .files.tmp if [[ $(isValidPageResult .files.tmp) == "yes" ]]; then mv .files.tmp "${RESULT_PATH}" echo "${RESULT_PATH}" return 0 fi return 1 } for f in "${PAGEINDEX_PATH}"*.json; do echo "working on page ${f}" jq -c -r '.query.allimages[]' "$f" | while read -r item; do pageid=$(jq -r '.descriptionshorturl' <<< "$item") name=$(jq -r '.name' <<< "$item") title=$(jq -r '.title' <<< "$item") url=$(jq -r '.url' <<< "$item") if [[ "${title}" != File:* ]]; then continue fi ext="${name##*.}" pageid="${pageid##*=}" if [[ "${pageid}" == "0" ]]; then echo "Fetch file name $name title $title extension $ext url $url" RESULT_PATH=$(fetchFile "${name}" "${url}") if [[ "${RESULT_PATH}" != "" ]]; then linkname="${PAGE_NAME_PATH}${title//[ \/]/_}" rm "${linkname}" 2>/dev/null ln -s "../files/${name}" "${linkname}" fi else echo "Fetch file ${pageid} title $title extension $ext url $url" RESULT_PATH=$(fetchFile "${pageid}.${ext}" "${url}") if [[ "${RESULT_PATH}" != "" ]]; then linkname="${PAGE_NAME_PATH}${title//[ \/]/_}" rm "${linkname}" 2>/dev/null ln -s "../files/${pageid}.${ext}" "${linkname}" fi fi done done docker stop curl-impersonate_files; docker rm curl-impersonate_files