proxies/pull_lists.py

43 lines
1.3 KiB
Python

from requests import request
from pathlib import Path
from re import compile
from tqdm import tqdm
proxy_re = compile(
r"(?=^((?:(?:(?P<protocol>socks[4-5]):\/\/)?(?:(?P<username>[a-z0-9A-Z]+):(?P<password>[a-z0-9A-Z]+)@)?)?(?P<ipaddr>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(?P<port>[^\n\s]+)))\1"
)
PULLED = Path("./pulled_proxies.txt")
LISTS = Path("./proxy_lists.txt")
PROXIES = set()
with LISTS.open("r") as f:
for line in tqdm(f):
line = line.rstrip("\n")
with request("GET", line) as resp:
try:
data = resp.json()
if isinstance(data, list):
for proxy in data:
PROXIES.add(f"{proxy['ip']}:{proxy['port']}")
continue
except Exception as e:
...
text = resp.text
for proxy in text.splitlines():
match = proxy_re.search(proxy.rstrip('\n'))
if match:
PROXIES.add(match.group(0))
continue
with PULLED.open("a+") as f:
for p in tqdm(PROXIES):
f.write(f"{p}""\n")
# https://raw.githubusercontent.com/User-R3X/proxy-list/main/archive/all.txt
# https://raw.githubusercontent.com/jetkai/proxy-list/main/archive/txt/working-proxies-history.txt