|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# configuration |
| 4 | +threads=128 |
| 5 | +corpusfile="/tmp/proxies.txt" |
| 6 | +benchmark1="http://example.com/" |
| 7 | +benchmark2="https://example.com/" |
| 8 | + |
| 9 | +# callbacks where each must return a list of IP:port pairs |
| 10 | +list1 () { curl 'https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all' --silent; } |
| 11 | +list2 () { curl 'https://proxylist.geonode.com/api/proxy-list?limit=1000&page=1&sort_by=lastChecked&sort_type=desc&protocols=http%2Chttps' --silent | jq -c '.data[] | "\(.ip):\(.port)"' | cut '-d"' -f2; } |
| 12 | +list3 () { curl 'https://proxylist.geonode.com/api/proxy-list?limit=1000&page=2&sort_by=lastChecked&sort_type=desc&protocols=http%2Chttps' --silent | jq -c '.data[] | "\(.ip):\(.port)"' | cut '-d"' -f2; } |
| 13 | +list4 () { curl 'https://proxylist.geonode.com/api/proxy-list?limit=1000&page=3&sort_by=lastChecked&sort_type=desc&protocols=http%2Chttps' --silent | jq -c '.data[] | "\(.ip):\(.port)"' | cut '-d"' -f2; } |
| 14 | +list5 () { curl 'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt' --silent; } |
| 15 | +list6 () { curl 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt' --silent; } |
| 16 | +list7 () { curl 'https://github.com/ShiftyTR/Proxy-List/blob/master/http.txt' --silent; } |
| 17 | +list8 () { curl 'https://github.com/ShiftyTR/Proxy-List/blob/master/https.txt' --silent; } |
| 18 | +list9 () { curl 'https://github.com/monosans/proxy-list/blob/main/proxies_anonymous/http.txt' --silent; } |
| 19 | +list10 () { curl 'https://multiproxy.org/txt_all/proxy.txt' --silent; } |
| 20 | + |
| 21 | +# fetch proxy server candidates |
| 22 | +rm "$corpusfile" 2>/dev/null |
| 23 | +{ |
| 24 | + list1; |
| 25 | + list2; |
| 26 | + list3; |
| 27 | + list4; |
| 28 | + list5; |
| 29 | + list6; |
| 30 | + list7; |
| 31 | + list8; |
| 32 | + list9; |
| 33 | + list10; |
| 34 | +} > "$corpusfile" |
| 35 | + |
| 36 | +# we need to know how some web servers respond to eliminate bad (malicious) servers |
| 37 | +checksum1=$( curl --silent "$benchmark1" | md5sum | cut '-d ' -f1 ) |
| 38 | +checksum2=$( curl --silent "$benchmark2" | md5sum | cut '-d ' -f1 ) |
| 39 | + |
| 40 | +# this function will be called asynchronously and is used to validate the integrity of the proxies |
| 41 | +check () { |
| 42 | + proxy="$1"; benchmark1="$2"; benchmark2="$3"; checksum1="$4"; checksum2="$5" |
| 43 | + result1=$( curl --connect-timeout 2 --max-time 5 --silent -x "$proxy" "$benchmark1" | md5sum | cut '-d ' -f1; ) |
| 44 | + if [[ "$result1" != "$checksum1" ]]; then |
| 45 | + echo "got $result1, expected $checksum1 for $proxy" 1>&2; |
| 46 | + return |
| 47 | + fi |
| 48 | + result2=$( curl --connect-timeout 2 --max-time 5 --silent -x "$proxy" "$benchmark2" | md5sum | cut '-d ' -f1; ) |
| 49 | + if [[ "$result2" != "$checksum2" ]]; then |
| 50 | + echo "got $result2, expected $checksum2 for $proxy" 1>&2; |
| 51 | + return |
| 52 | + fi |
| 53 | + echo "$proxy" |
| 54 | +} |
| 55 | +export -f check |
| 56 | + |
| 57 | +# iterate over each and every proxy and run check in parallel |
| 58 | +sort < "$corpusfile" | \ |
| 59 | +grep -Po '(\d+\.){3}\d+:\d+' | \ |
| 60 | +uniq | \ |
| 61 | +shuf | \ |
| 62 | +xargs -n 1 -P "$threads" -I {} \ |
| 63 | +bash -c "check '{}' '$benchmark1' '$benchmark2' '$checksum1' '$checksum2'" |
0 commit comments