Skip to content

Commit

Permalink
some docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
mxsnq committed Oct 9, 2019
1 parent e214ea7 commit 19ad819
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 7 deletions.
10 changes: 10 additions & 0 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@


class PuppeteerServiceDownloaderMiddleware:
"""
This downloader middleware converts PuppeteerRequest instances to
Puppeteer service API requests and then converts its responses to
PuppeteerResponse instances. Additionally it tracks all browser contexts
that spider uses and performs cleanup request to service once spider
is closed.
Puppeteer service URL may be set via PUPPETEER_SERVICE_URL setting.
"""

def __init__(self, crawler: Crawler, service_url: str):
self.service_base_url = service_url
self.crawler = crawler
Expand Down
32 changes: 28 additions & 4 deletions scrapypuppeteer/request.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,36 @@
from scrapy.http import Request
from typing import Union

from scrapypuppeteer.actions import GoTo
from scrapy.http import Request, Response

from scrapypuppeteer.actions import GoTo, PuppeteerServiceAction


class PuppeteerRequest(Request):
def __init__(self, action, context_id=None, page_id=None,
close_page=True, response=None,
"""
Request to be executed in browser with puppeteer.
"""

def __init__(self,
action: Union[str, PuppeteerServiceAction],
context_id: str = None,
page_id: str = None,
close_page: bool = True,
response: Response = None,
**kwargs):
"""
:param action: URL or browser action
:param context_id: puppeteer browser context id; if None (default),
new incognito context will be created
:param page_id: puppeteer browser page id; if None (default), new
page will be opened in given context
:param close_page: whether to close page after request completion;
set to False, if you want to continue interacting
with the page
:param response: a response which this request follows; if target page URL
can't be inferred from action, it is set to response.url
:param kwargs:
"""
if isinstance(action, str):
url = action
options = kwargs.pop('options', None)
Expand Down
35 changes: 32 additions & 3 deletions scrapypuppeteer/response.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,36 @@
from typing import Union
from urllib.parse import urljoin

from scrapy.http import Response, TextResponse

from scrapypuppeteer import PuppeteerRequest
from scrapypuppeteer.actions import GoTo
from scrapypuppeteer.actions import GoTo, PuppeteerServiceAction


class PuppeteerResponse(Response):
def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):
def __init__(self,
url: str,
puppeteer_request: PuppeteerRequest,
context_id: str,
page_id: str,
**kwargs):
self.puppeteer_request = puppeteer_request
self.context_id = context_id
self.page_id = page_id
super().__init__(url, **kwargs)

def follow(self, action, close_page=True, **kwargs):
def follow(self,
action: Union[str, PuppeteerServiceAction],
close_page=True,
**kwargs) -> PuppeteerRequest:
"""
Execute action in same browser page.
:param action: URL (may be relative) or browser action.
:param close_page: whether to close page after request completion
:param kwargs:
:return:
"""
page_id = None if self.puppeteer_request.close_page else self.page_id
if isinstance(action, str):
action = urljoin(self.url, action)
Expand All @@ -24,6 +41,10 @@ def follow(self, action, close_page=True, **kwargs):


class PuppeteerHtmlResponse(PuppeteerResponse, TextResponse):
"""
scrapy.TextResponse capturing state of a page in browser.
Additionally exposes received html and cookies via corresponding attributes.
"""
def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):
self.html = kwargs.pop('html')
self.cookies = kwargs.pop('cookies')
Expand All @@ -33,12 +54,20 @@ def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):


class PuppeteerJsonResponse(PuppeteerResponse):
"""
Response for CustomJsAction.
Result is available via self.data object.
"""
def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):
self.data = kwargs
super().__init__(url, puppeteer_request, context_id, page_id, **kwargs)


class PuppeteerScreenshotResponse(PuppeteerResponse):
"""
Response for Screenshot action.
Screenshot is available via self.screenshot as base64 encoded string.
"""
def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):
self.screenshot = kwargs.get('screenshot')
super().__init__(url, puppeteer_request, context_id, page_id, **kwargs)

0 comments on commit 19ad819

Please sign in to comment.