Skip to content

Cannot switch proxies in a loop #2002

@6897889

Description

@6897889

Cannot switch agents in asynchrony
only use async with httpx.AsyncClient(proxies=proxies) as client:
I want to switch agents in a loop, but I can't switch. I want to find a way to switch agents in a loop.

I used this method, but an error was reported
await client.get(url, timeout=20, proxies='http://78.135.87.159:3128')
TypeError: get() got an unexpected keyword argument 'proxy'

Use async with httpx.AsyncClient(limits=limits,proxies=proxies) as client: After crawling a certain amount, the crawler is blocked

`
import asyncio
import time
import httpx
import requests

async def req(client, i, url):
try:
res = await client.get(url, timeout=20)
# res = await client.get(url, timeout=20,proxies=proxies) # ERROR can not use???
print(f'第{i}次请求,status_code = {res.status_code}')
print(res.request.url)
# print(res.headers)
print(res.text)
return res
except Exception as e:
print('error occurred while scraping ',url)
with open('httpxerror.txt',mode='a') as f:
f.write(time.strftime('%Y-%m-%d %H:%M:%S ')+url+'\n')
print(Exception)
time.sleep(1)
raise
async def main():
urllinks = [] # 存放url
PAGE_SIZE = 18
PAGE_NUMBER =1
INDEX_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
task_list = [] # 任务列表
proxypool_url = 'http://127.0.0.1:5555/random'
proxy = requests.get(proxypool_url).text.strip()
print(proxy)
proxies = 'http://' + proxy
# proxies = {'http://78.135.87.159:3128',
# 'http://110.86.181.156:7082'
# }
print(proxies)

limits = httpx.Limits(max_keepalive_connections=5, max_connections=10)
async with httpx.AsyncClient(limits=limits,proxies=proxies) as client:
    for page in range(1, PAGE_NUMBER + 1):
        urllink = INDEX_URL.format(offset=PAGE_SIZE * (page - 1))
        # urllink='https://www.httpbin.org/get'
        urllinks.append(urllink)
        print(urllinks[page - 1])
        res = req(client, page, urllinks[page - 1])
        task = asyncio.create_task(res)  # 创建任务
        task_list.append(task)
    await asyncio.gather(*task_list)  # 收集任务

if name == 'main':
start = time.time()
asyncio.run(main())
end = time.time()
print(f'time:{end - start}')`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions