Skip to content

Commit

Permalink
feat: added list to print any error that occured during search
Browse files Browse the repository at this point in the history
  • Loading branch information
iamatulsingh committed Oct 28, 2024
1 parent 8ff8506 commit f3358d1
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
- name: Run test suite
run: |
pytest -v e2e.py
pytest -sv e2e.py
6 changes: 5 additions & 1 deletion e2e.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from pinscrape import scraper, Pinterest
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

keyword = "messi"
output_folder = "output"
Expand All @@ -23,5 +26,6 @@ def test_single_data():
def test_v2():
p = Pinterest()
images_url = p.search(keyword, images_to_download)
p.download(url_list=images_url, number_of_workers=number_of_workers, output_folder=output_folder)
print(p.errors)
assert len(images_url) == images_to_download
p.download(url_list=images_url, number_of_workers=number_of_workers, output_folder=output_folder)
21 changes: 13 additions & 8 deletions pinscrape/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

class Pinterest:
def __init__(self, user_agent: str = "", proxies: dict = None):
self.errors = []
self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.6668.71 Safari/537.36" \
if not user_agent else user_agent
self.BASE_URL = "https://www.pinterest.com"
Expand Down Expand Up @@ -201,15 +202,19 @@ def search(self, query: str, page_size=26) -> list:
image_urls = []
if response.status_code != 200:
logging.warning(f"Image search has failed!, {response.status_code}, {response.text}")
self.errors.append(f"Image search has failed!, {response.status_code}, {response.text}")
return []
try:
json_data = response.json()
results = json_data.get('resource_response', {}).get('data', {}).get('results', [])
for result in results:
image_urls.append(result['images']['orig']['url'])
self.client_context = json_data['client_context']
logging.info(f"Total {len(image_urls)} image(s) found.")
return image_urls
except requests.exceptions.JSONDecodeError as jde:
self.errors.append(jde.args)
return []

json_data = response.json()
results = json_data.get('resource_response', {}).get('data', {}).get('results', [])
for result in results:
image_urls.append(result['images']['orig']['url'])
self.client_context = json_data['client_context']
logging.info(f"Total {len(image_urls)} image(s) found.")
return image_urls


if __name__ == "__main__":
Expand Down

0 comments on commit f3358d1

Please sign in to comment.