Skip to content

Commit 5a81c36

Browse files
committed
Happy Scraping! 🚀
0 parents  commit 5a81c36

File tree

7 files changed

+417
-0
lines changed

7 files changed

+417
-0
lines changed

.gitignore

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
*.py,cover
51+
.hypothesis/
52+
.pytest_cache/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
target/
76+
77+
# Jupyter Notebook
78+
.ipynb_checkpoints
79+
80+
# IPython
81+
profile_default/
82+
ipython_config.py
83+
84+
# pyenv
85+
.python-version
86+
87+
# pipenv
88+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
90+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
91+
# install all needed dependencies.
92+
#Pipfile.lock
93+
94+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95+
__pypackages__/
96+
97+
# Celery stuff
98+
celerybeat-schedule
99+
celerybeat.pid
100+
101+
# SageMath parsed files
102+
*.sage.py
103+
104+
# Environments
105+
.env
106+
.venv
107+
env/
108+
venv/
109+
ENV/
110+
env.bak/
111+
venv.bak/
112+
113+
# Spyder project settings
114+
.spyderproject
115+
.spyproject
116+
117+
# Rope project settings
118+
.ropeproject
119+
120+
# mkdocs documentation
121+
/site
122+
123+
# mypy
124+
.mypy_cache/
125+
.dmypy.json
126+
dmypy.json
127+
128+
# Pyre type checker
129+
.pyre/

README.md

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Scrape.do python-sample
2+
3+
<p align="center" style="background-color:white;" >
4+
<img src="https://scrape.do/images/scrape-logo.svg" alt="scrape.do" width="200">
5+
</p>
6+
7+
Web Scraper API
8+
9+
- Best Rotating Proxy & [Scraping API](https://scrape.do/) Alternative.
10+
- :warning: Please read the [documents](https://scrape.do/docs/getting-started) first.
11+
12+
#### You can send request to any webpages with proxy gateway & web api provided by scrape.do. As you can see from the example, this takes only few lines of code
13+
14+
#### You can see Example here ([test.py](/test.py))
15+
16+
## Usage
17+
18+
first you need to create python_sample as [python-sample](/python_sample_scrape_do/__init__.py)
19+
20+
and use python_sample class to handle scrape.do api's
21+
22+
from python_sample_scrape_do import Scrape_do_Exception, python_sample
23+
24+
API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
25+
26+
# create an python-sample object
27+
sample = python_sample()
28+
29+
# set the scrape.do api key
30+
sample.set_api_token(api_token=API_TOKEN)
31+
32+
# Get Scrape.do account statistics
33+
try:
34+
resp = sample.account_status()
35+
print("Response Type " + str(type(resp)))
36+
print(resp)
37+
except ConnectionError as e:
38+
print(str(e))
39+
print(traceback.format_exc())
40+
41+
except Scrape_do_Exception as e:
42+
print(str(e))
43+
print(traceback.format_exc())
44+
45+
try:
46+
resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
47+
render=False, super_proxies=False, geo_code=None)
48+
print(resp)
49+
except ConnectionError as e:
50+
print(str(e))
51+
print(traceback.format_exc())
52+
53+
except Scrape_do_Exception as e:
54+
print(str(e))
55+
print(traceback.format_exc())
56+
57+
### Screenshot
58+
59+
<br/>
60+
61+
![python-sample](python_sample.png)
62+
63+
<br/><br/>

python_sample.png

126 KB
Loading

python_sample_scrape_do/__init__.py

+169
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""
2+
Date: 29-08-2020
3+
Created by Sameer Narkhede
4+
Project : python_sample
5+
Module : python_sample_scrape_do
6+
"""
7+
import traceback
8+
import requests
9+
10+
11+
class Scrape_do_Exception(BaseException):
12+
"""
13+
custom Scrape.do exception class
14+
"""
15+
pass
16+
17+
18+
class python_sample:
19+
"""
20+
Python sample class for proxy rotating api's https://scrape.do
21+
"""
22+
23+
def __init__(self):
24+
self.scrape_do_api_token = None
25+
26+
def set_api_token(self, api_token=None):
27+
"""
28+
set scrape.do api token you can find this token from https://scrape.do/dashboard this needs login.
29+
:param api_token: String API_TOKEN from https://scrape.do
30+
:return: None
31+
"""
32+
self.scrape_do_api_token = api_token
33+
34+
def account_status(self):
35+
"""
36+
returns the statistics of your scrape.do account
37+
:return: Dictionary of statistics
38+
"""
39+
if self.scrape_do_api_token:
40+
41+
response = requests.get("http://api.scrape.do/info?token=" + self.scrape_do_api_token)
42+
43+
return response.json()
44+
else:
45+
raise Scrape_do_Exception("api-token is not configured")
46+
47+
def create_request_url(self, url, method="GET", payload=None, headers=None, render=False,
48+
super_proxies=False, geo_code=None):
49+
"""
50+
Best Rotating Proxy & Scraping API Alternative https://scrape.do/ api handler
51+
new request url
52+
53+
:param url: String the url user needs to scrape. Ex. 'https://httpbin.org/get'
54+
55+
:param method: String method for the url request. Ex. ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``,
56+
``PATCH``, or ``DELETE``
57+
58+
:param payload: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the
59+
60+
:param headers: (optional) Dictionary of HTTP Headers to send with the request
61+
62+
:param render: (optional) Boolean - To use Javascript, all you need to do is setting render parameter to true
63+
** Beware that you need a business plan to use this feature!
64+
65+
:param super_proxies:(optional) Boolean - To use Super Proxies, all you need to do is setting super parameter
66+
to true
67+
** Beware that you need a business plan to use this feature!
68+
:param geo_code: geocode in 'us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br' ex. us
69+
** Beware that you need a Pro plan to use this feature!
70+
71+
:return: response of scrape.do api
72+
73+
"""
74+
75+
# check if there is token is configured
76+
if self.scrape_do_api_token:
77+
base_url = "http://api.scrape.do"
78+
79+
params = {'token': self.scrape_do_api_token}
80+
81+
if headers is None:
82+
headers = {}
83+
84+
if payload is None:
85+
payload = {}
86+
87+
if headers is not None and headers is not {}:
88+
params['customHeaders'] = 'true'
89+
90+
params['url'] = url
91+
92+
if render:
93+
params['render'] = 'true' if render else 'false'
94+
95+
if super_proxies:
96+
params['super'] = 'true' if super_proxies else 'false'
97+
98+
if geo_code:
99+
geocodes = ['us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br']
100+
101+
if geo_code not in geocodes:
102+
raise Scrape_do_Exception(
103+
"Geo-Code is not valid. please provide geo-code in " + str(geocodes))
104+
105+
params['geo_code'] = geo_code
106+
107+
methods = ["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
108+
if method not in methods:
109+
raise Scrape_do_Exception("method is not valid. please provide method in " + str(methods))
110+
111+
response = requests.request(method, base_url, params=params, headers=headers, data=payload)
112+
113+
print("status_code:" + str(response.status_code))
114+
115+
if response.status_code == 200:
116+
return response.text.encode('utf8')
117+
118+
elif response.status_code == 404:
119+
raise Scrape_do_Exception("Target url not found :: Pass valid URL")
120+
121+
elif response.status_code == 429:
122+
raise Scrape_do_Exception("You are sending too many concurrent request :: Please upgrade your "
123+
"plan or contact with sale.")
124+
125+
elif response.status_code == 401:
126+
raise Scrape_do_Exception("You have not credit :: Please upgrade your plan or contact with sale.")
127+
128+
elif response.status_code == 502:
129+
raise Scrape_do_Exception("Gateway Error :: Please retry and check response. If you live "
130+
"constantly, contact [email protected]")
131+
132+
else:
133+
raise Scrape_do_Exception("api-token is not configured")
134+
135+
136+
if __name__ == '__main__':
137+
138+
API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
139+
140+
# create an python-sample object
141+
sample = python_sample()
142+
143+
# set the scrape.do api key
144+
# sample.set_api_token(api_token=API_TOKEN)
145+
146+
# Get Scrape.do account statistics
147+
try:
148+
resp = sample.account_status()
149+
print("Response Type " + str(type(resp)))
150+
print(resp)
151+
except ConnectionError as e:
152+
print(str(e))
153+
print(traceback.format_exc())
154+
155+
except Scrape_do_Exception as e:
156+
print(str(e))
157+
print(traceback.format_exc())
158+
159+
try:
160+
resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
161+
render=False, super_proxies=False, geo_code=None)
162+
print(resp)
163+
except ConnectionError as e:
164+
print(str(e))
165+
print(traceback.format_exc())
166+
167+
except Scrape_do_Exception as e:
168+
print(str(e))
169+
print(traceback.format_exc())

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
requests

setup.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from setuptools import setup
2+
3+
setup(
4+
name='python_sample_scrape_do',
5+
version='1.0',
6+
packages=['python_sample_scrape_do'],
7+
url='',
8+
license='',
9+
author='Sameer Narkhede',
10+
author_email='[email protected]',
11+
description='Scrape.do python sample repository you can directly copy and paste to your project'
12+
)

0 commit comments

Comments
 (0)