Skip to content

Commit 0bac89c

Browse files
committed
Scraper created
1 parent 54fec19 commit 0bac89c

File tree

8 files changed

+200
-9
lines changed

8 files changed

+200
-9
lines changed

main/templates/main/index.html

+57-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,60 @@
11
{% extends 'base.html' %}
2-
2+
{% load static %}
33
{% block content %}
4-
<h1>Inital setup done.</h1>
4+
<div class="container">
5+
6+
<div class="header">
7+
8+
<h1 class="text-center">Make your search</h1>
9+
10+
<form id="labnol" action="" method="GET">
11+
<div class="row">
12+
<div class="col-xs-11 col-sm-8 col-md-6 col-lg-4 mx-auto speech">
13+
<input type="text" name="item" class="form-control" id="transcript" required placeholder="Search items">
14+
<img onclick="startDictation()" src="{% static 'images/mic.gif' %}" />
15+
</div>
16+
</div>
17+
18+
<div class="row">
19+
<div class="col-md-4 mx-auto text-center">Price range (optional):</div>
20+
</div>
21+
<div class="row">
22+
<div class="col-xs-2 col-sm-4 col-md-3 col-lg-2 ml-auto pt-2">
23+
<input type="text" class="form-control" name="from" id="price-low" placeholder="From">
24+
</div>
25+
<div class="col-xs-2 col-sm-4 col-md-3 col-lg-2 mr-auto pt-2">
26+
<input type="text" class="form-control" name="to" id="price-high" placeholder="To">
27+
</div>
28+
</div>
29+
<div class="row">
30+
<div class="col-xs-11 col-sm-8 col-md-6 col-lg-4 mx-auto text-center">
31+
<button type="submit" class="btn btn-primary">Search</button>
32+
</div>
33+
</div>
34+
35+
</form>
36+
</div>
37+
<hr>
38+
{% if items %}
39+
<table>
40+
{% for item in items %}
41+
<tr>
42+
<td>
43+
<a href="{{item.link}}">{{ item.name }}</a>
44+
</td>
45+
<td>
46+
{{ item.condition }}
47+
</td>
48+
<td>
49+
{{ item.price }}
50+
</td>
51+
<td>
52+
<img src="{{ item.image }}" alt="">
53+
</td>
54+
</tr>
55+
56+
{% endfor %}
57+
</table>
58+
{% endif %}
59+
</div>
560
{% endblock %}

main/urls.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
from . import views
33

44
urlpatterns = [
5-
path('', views.index, name="index")
5+
path('', views.Index.as_view(), name="index")
66
]

main/views.py

+75-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,78 @@
11
from django.shortcuts import render
2+
from django.views.generic import ListView
3+
import requests
4+
from bs4 import BeautifulSoup
5+
from multiprocessing import cpu_count, Pool
26

7+
class Index(ListView):
8+
queryset = []
9+
context_object_name = "items"
10+
template_name = "main/index.html"
311

4-
# Create your views here.
5-
def index(request):
6-
return render(request, "main/index.html")
12+
def get_context_data(self, *args, **kwargs):
13+
context = super(Index, self).get_context_data(*args, **kwargs)
14+
context['queryset'] = self.queryset
15+
return context
16+
17+
def get_queryset(self):
18+
base_url = "https://www.ebay.com/sch/parser.html?_from=R40&_nkw={item}&_ipg=25"
19+
prices_url = "&_udlo={price_low}&_udhi={price_high}"
20+
item = self.request.GET.get('item')
21+
price_low = self.request.GET.get('from')
22+
price_high = self.request.GET.get('to')
23+
if self.request.method == 'GET' and item:
24+
item = "+".join(item.split())
25+
if price_low and price_high:
26+
url = (base_url + prices_url).format(item=item,price_low=price_low,price_high=price_high)
27+
else:
28+
url = base_url.format(item=item)
29+
scraper = Scraper(base_url=url)
30+
app = scraper.run()
31+
print(self.queryset)
32+
return self.queryset
33+
34+
35+
class Scraper(Index):
36+
def __init__(self, base_url=None):
37+
super(Scraper, self).__init__()
38+
39+
self.base_url = base_url
40+
41+
def run(self):
42+
43+
try:
44+
bs = self.make_soup(self.base_url)
45+
if not bs.get('error'):
46+
rows = bs.find_all('div', class_="s-item__wrapper")[:10]
47+
for parser in rows:
48+
self.parse_rows(parser)
49+
except Exception as error:
50+
print(error)
51+
return self.queryset
52+
53+
def parse_rows(self, parser):
54+
name = parser.find('h3', class_="s-item__title").text
55+
link = parser.find('a', class_="s-item__link").get('href')
56+
condition = parser.find('span', class_="SECONDARY_INFO").text
57+
price = parser.find('span', class_="s-item__price").text
58+
image = parser.find('img', class_="s-item__image-img").get('src')
59+
if image == 'https://ir.ebaystatic.com/cr/v/c1/s_1x2.gif':
60+
soup = self.make_soup(link)
61+
image = soup.find('img', {'id': "icImg"}).get('src')
62+
print(self.queryset)
63+
self.queryset.append(dict(name=name,link=link,condition=condition,price=price,image=image))
64+
65+
def make_soup(self, url):
66+
headers = {'Accept': '*/*',
67+
'Accept-Encoding': 'gzip, deflate, sdch',
68+
'Accept-Language': 'en-US,en;q=0.8',
69+
'content-security-policy': "media-src 'self' *.ebaystatic.com; font-src 'self' *.ebaystatic.com",
70+
'Cache-Control': 'max-age=0',
71+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
72+
73+
page = requests.get(url, headers=headers, timeout=15)
74+
if page.status_code == 200:
75+
soup = BeautifulSoup(page.content, "lxml")
76+
else:
77+
soup = {'error': "We got status code %s" % page.status_code}
78+
return soup

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
bs4==0.0.1
22
Django==2.1.7
3-
requests==2.21.0
3+
requests==2.21.0
4+
lxml==4.3.3

static/css/style.css

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
html {
3+
height: 100%;
4+
}
5+
6+
body {
7+
margin: 0;
8+
height: 100%;
9+
background: rgb(12,17,98);
10+
background: linear-gradient(0deg, rgba(12,17,98,0.8550770650056898) 0%, rgba(9,9,121,0.8130602582830007) 42%, rgba(0,212,255,0.9251050762101716) 100%);
11+
background-repeat: no-repeat;
12+
background-attachment: fixed;
13+
}
14+
15+
.speech input {
16+
height: 40px;
17+
}
18+
.speech img {
19+
position: absolute;
20+
right: 15px;
21+
top: 0;
22+
cursor: pointer;
23+
width: 40px
24+
}
25+
26+
.btn-primary {
27+
margin-top: 10px;
28+
width: 70%;
29+
-webkit-box-shadow: 0px 0px 11px 1px rgba(79,77,79,1);
30+
-moz-box-shadow: 0px 0px 11px 1px rgba(79,77,79,1);
31+
box-shadow: 0px 0px 11px 1px rgba(79,77,79,1);
32+
}
33+
34+
img {
35+
max-width: 225px;
36+
}

static/images/mic.gif

492 Bytes
Loading

static/js/main.js

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
function startDictation() {
2+
3+
if (window.hasOwnProperty('webkitSpeechRecognition')) {
4+
5+
var recognition = new webkitSpeechRecognition();
6+
7+
recognition.continuous = false;
8+
recognition.interimResults = false;
9+
10+
recognition.lang = "en-US";
11+
recognition.start();
12+
13+
recognition.onresult = function(e) {
14+
document.getElementById('transcript').value
15+
= e.results[0][0].transcript;
16+
recognition.stop();
17+
//document.getElementById('labnol').submit();
18+
};
19+
20+
recognition.onerror = function(e) {
21+
recognition.stop();
22+
}
23+
24+
}
25+
}

templates/base.html

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
<html>
55
<head>
66
<title>Django Scraper</title>
7-
<link rel="icon" href="{% static 'images/favicon.png' %}" type="image/x-icon"/>
7+
<link rel="icon" href="{% static 'images/favicon.png' %}" type="image/x-icon" />
88
<meta name="viewport" content="width=device-width, initial-scale=1" />
99
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
10-
10+
<link rel="stylesheet" href="{% static 'css/style.css' %}" />
1111
</head>
1212
<body>
1313

@@ -20,6 +20,8 @@
2020
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
2121

2222
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
23+
24+
<script src="{% static 'js/main.js' %}"></script>
2325
{% block script %}{% endblock %}
2426
</body>
2527
</html>

0 commit comments

Comments
 (0)