Happy Scraping! 🚀

mertbekci · mertbekci · commit 5a81c36c50ba · 2023-08-08T13:00:45.000+03:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/README.md b/README.md
@@ -0,0 +1,63 @@
+# Scrape.do python-sample
+
+<p align="center" style="background-color:white;" >
+    <img src="https://scrape.do/images/scrape-logo.svg" alt="scrape.do" width="200">
+</p>
+
+Web Scraper API
+
+- Best Rotating Proxy & [Scraping API](https://scrape.do/) Alternative.
+- :warning: Please read the [documents](https://scrape.do/docs/getting-started) first.
+
+#### You can send request to any webpages with proxy gateway & web api provided by scrape.do. As you can see from the example, this takes only few lines of code
+
+#### You can see Example here ([test.py](/test.py))
+
+## Usage
+
+first you need to create python_sample as [python-sample](/python_sample_scrape_do/__init__.py)
+
+and use python_sample class to handle scrape.do api's
+
+    from python_sample_scrape_do import Scrape_do_Exception, python_sample
+
+    API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
+
+    # create an python-sample object
+    sample = python_sample()
+
+    # set the scrape.do api key
+    sample.set_api_token(api_token=API_TOKEN)
+
+    # Get Scrape.do account statistics
+    try:
+        resp = sample.account_status()
+        print("Response Type " + str(type(resp)))
+        print(resp)
+    except ConnectionError as e:
+        print(str(e))
+        print(traceback.format_exc())
+
+    except Scrape_do_Exception as e:
+        print(str(e))
+        print(traceback.format_exc())
+
+    try:
+        resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
+                                         render=False, super_proxies=False, geo_code=None)
+        print(resp)
+    except ConnectionError as e:
+        print(str(e))
+        print(traceback.format_exc())
+
+    except Scrape_do_Exception as e:
+        print(str(e))
+        print(traceback.format_exc())
+
+### Screenshot
+
+<br/>
+
+![python-sample](python_sample.png)
+
+<br/><br/>
diff --git a/python_sample.png b/python_sample.png
diff --git a/python_sample_scrape_do/__init__.py b/python_sample_scrape_do/__init__.py
@@ -0,0 +1,169 @@
+"""
+    Date: 29-08-2020
+    Created by Sameer Narkhede
+    Project : python_sample
+    Module : python_sample_scrape_do
+"""
+import traceback
+import requests
+
+
+class Scrape_do_Exception(BaseException):
+    """
+    custom Scrape.do exception class
+    """
+    pass
+
+
+class python_sample:
+    """
+    Python sample class for proxy rotating api's https://scrape.do
+    """
+
+    def __init__(self):
+        self.scrape_do_api_token = None
+
+    def set_api_token(self, api_token=None):
+        """
+        set scrape.do api token you can find this token from https://scrape.do/dashboard this needs login.
+        :param api_token: String API_TOKEN from https://scrape.do
+        :return: None
+        """
+        self.scrape_do_api_token = api_token
+
+    def account_status(self):
+        """
+        returns the statistics of your scrape.do account
+        :return: Dictionary of statistics
+        """
+        if self.scrape_do_api_token:
+
+            response = requests.get("http://api.scrape.do/info?token=" + self.scrape_do_api_token)
+
+            return response.json()
+        else:
+            raise Scrape_do_Exception("api-token is not configured")
+
+    def create_request_url(self, url, method="GET", payload=None, headers=None, render=False,
+                           super_proxies=False, geo_code=None):
+        """
+        Best Rotating Proxy & Scraping API Alternative https://scrape.do/ api handler
+        new request url
+
+        :param url: String the url user needs to scrape. Ex. 'https://httpbin.org/get'
+
+        :param method: String method for the url request. Ex. ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``,
+                        ``PATCH``, or ``DELETE``
+
+        :param payload: (optional) Dictionary, list of tuples, bytes, or file-like object to send in the body of the
+
+        :param headers: (optional) Dictionary of HTTP Headers to send with the request
+
+        :param render: (optional) Boolean - To use Javascript, all you need to do is setting render parameter to true
+                        ** Beware that you need a business plan to use this feature!
+
+        :param super_proxies:(optional) Boolean - To use Super Proxies, all you need to do is setting super parameter
+                            to true
+                        ** Beware that you need a business plan to use this feature!
+        :param geo_code: geocode in 'us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br' ex. us
+                        ** Beware that you need a Pro plan to use this feature!
+
+        :return: response of scrape.do api
+
+        """
+
+        # check if there is token is configured
+        if self.scrape_do_api_token:
+            base_url = "http://api.scrape.do"
+
+            params = {'token': self.scrape_do_api_token}
+
+            if headers is None:
+                headers = {}
+
+            if payload is None:
+                payload = {}
+
+            if headers is not None and headers is not {}:
+                params['customHeaders'] = 'true'
+
+            params['url'] = url
+
+            if render:
+                params['render'] = 'true' if render else 'false'
+
+            if super_proxies:
+                params['super'] = 'true' if super_proxies else 'false'
+
+            if geo_code:
+                geocodes = ['us', 'gb', 'ca', 'tr', 'cn', 'ru', 'se', 'de', 'fr', 'es', 'br']
+
+                if geo_code not in geocodes:
+                    raise Scrape_do_Exception(
+                        "Geo-Code is not valid. please provide geo-code in " + str(geocodes))
+
+                params['geo_code'] = geo_code
+
+            methods = ["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
+            if method not in methods:
+                raise Scrape_do_Exception("method is not valid. please provide method in " + str(methods))
+
+            response = requests.request(method, base_url, params=params, headers=headers, data=payload)
+
+            print("status_code:" + str(response.status_code))
+
+            if response.status_code == 200:
+                return response.text.encode('utf8')
+
+            elif response.status_code == 404:
+                raise Scrape_do_Exception("Target url not found :: Pass valid URL")
+
+            elif response.status_code == 429:
+                raise Scrape_do_Exception("You are sending too many concurrent request :: Please upgrade your "
+                                          "plan or contact with sale.")
+
+            elif response.status_code == 401:
+                raise Scrape_do_Exception("You have not credit :: Please upgrade your plan or contact with sale.")
+
+            elif response.status_code == 502:
+                raise Scrape_do_Exception("Gateway Error :: Please retry and check response. If you live "
+                                          "constantly, contact support@scrape.do")
+
+        else:
+            raise Scrape_do_Exception("api-token is not configured")
+
+
+if __name__ == '__main__':
+
+    API_TOKEN = "Your_API_TOKEN_FOR_scrape.do"
+
+    # create an python-sample object
+    sample = python_sample()
+
+    # set the scrape.do api key
+    # sample.set_api_token(api_token=API_TOKEN)
+
+    # Get Scrape.do account statistics
+    try:
+        resp = sample.account_status()
+        print("Response Type " + str(type(resp)))
+        print(resp)
+    except ConnectionError as e:
+        print(str(e))
+        print(traceback.format_exc())
+
+    except Scrape_do_Exception as e:
+        print(str(e))
+        print(traceback.format_exc())
+
+    try:
+        resp = sample.create_request_url(url='https://docs.scrape.do/', method="GET", payload={}, headers={},
+                                         render=False, super_proxies=False, geo_code=None)
+        print(resp)
+    except ConnectionError as e:
+        print(str(e))
+        print(traceback.format_exc())
+
+    except Scrape_do_Exception as e:
+        print(str(e))
+        print(traceback.format_exc())
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+requests
diff --git a/setup.py b/setup.py
@@ -0,0 +1,12 @@
+from setuptools import setup
+
+setup(
+    name='python_sample_scrape_do',
+    version='1.0',
+    packages=['python_sample_scrape_do'],
+    url='',
+    license='',
+    author='Sameer Narkhede',
+    author_email='narkhedesam@gmail.com',
+    description='Scrape.do python sample repository you can directly copy and paste to your project'
+)
diff --git a/test.py b/test.py