-
-
Notifications
You must be signed in to change notification settings - Fork 998
Description
Cannot switch agents in asynchrony
only use async with httpx.AsyncClient(proxies=proxies) as client:
I want to switch agents in a loop, but I can't switch. I want to find a way to switch agents in a loop.
I used this method, but an error was reported
await client.get(url, timeout=20, proxies='http://78.135.87.159:3128')
TypeError: get() got an unexpected keyword argument 'proxy'
Use async with httpx.AsyncClient(limits=limits,proxies=proxies) as client: After crawling a certain amount, the crawler is blocked
`
import asyncio
import time
import httpx
import requests
async def req(client, i, url):
try:
res = await client.get(url, timeout=20)
# res = await client.get(url, timeout=20,proxies=proxies) # ERROR can not use???
print(f'第{i}次请求,status_code = {res.status_code}')
print(res.request.url)
# print(res.headers)
print(res.text)
return res
except Exception as e:
print('error occurred while scraping ',url)
with open('httpxerror.txt',mode='a') as f:
f.write(time.strftime('%Y-%m-%d %H:%M:%S ')+url+'\n')
print(Exception)
time.sleep(1)
raise
async def main():
urllinks = [] # 存放url
PAGE_SIZE = 18
PAGE_NUMBER =1
INDEX_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
task_list = [] # 任务列表
proxypool_url = 'http://127.0.0.1:5555/random'
proxy = requests.get(proxypool_url).text.strip()
print(proxy)
proxies = 'http://' + proxy
# proxies = {'http://78.135.87.159:3128',
# 'http://110.86.181.156:7082'
# }
print(proxies)
limits = httpx.Limits(max_keepalive_connections=5, max_connections=10)
async with httpx.AsyncClient(limits=limits,proxies=proxies) as client:
for page in range(1, PAGE_NUMBER + 1):
urllink = INDEX_URL.format(offset=PAGE_SIZE * (page - 1))
# urllink='https://www.httpbin.org/get'
urllinks.append(urllink)
print(urllinks[page - 1])
res = req(client, page, urllinks[page - 1])
task = asyncio.create_task(res) # 创建任务
task_list.append(task)
await asyncio.gather(*task_list) # 收集任务
if name == 'main':
start = time.time()
asyncio.run(main())
end = time.time()
print(f'time:{end - start}')`