-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
70 lines (62 loc) · 2.58 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from flask import Flask, jsonify
import requests
import logging
from lxml import html
from werkzeug.exceptions import NotFound, InternalServerError
def init_app():
application = Flask(__name__)
logging.basicConfig(level=logging.INFO)
# GET method to parse LendingTree review pages
# Returns: a list of reviews (title, text, reviewer name and review date) in JSON format
# Else: throws an error and exists gracefully
@application.route('/reviews/<path:path>', methods=['GET'])
def get_reviews_list(path):
logging.info("***Started parsing reviews list***")
uri = "https://www.lendingtree.com/reviews/" + path
response = requests.get(uri)
if response is None or response.content is None or response.status_code != 200:
raise NotFound
content = response.content
reviews = build_reviews_data(content)
json_resp = {"response": reviews}
logging.info("***Finished parsing reviews list***")
return json_resp
# convert HTML content into tree structure and parse it with queries
# to get specific list of items.
# Parsing process heavily dependends from HTML structure.
def build_reviews_data(content):
tree = html.fromstring(content)
review_titles = tree.xpath('//p[@class="reviewTitle"]/text()')
review_texts = tree.xpath('//p[@class="reviewText"]/text()')
consumer_names = tree.xpath('//p[@class="consumerName"]/text()')
consumer_review_dates = tree.xpath('//p[@class="consumerReviewDate"]/text()')
num_recs = tree.xpath('//div[@class="numRec"]/text()')
reviews = []
for t, x, c, d, n in zip(review_titles, review_texts, consumer_names, consumer_review_dates, num_recs):
review = {
'review_titles': t,
'review_texts': x,
'consumer_names': c.strip(),
'consumer_review_dates': d,
'num_recs': n
}
reviews.append(review)
return reviews
@application.errorhandler(Exception)
def error_handler(e):
code = e.code
if isinstance(e, NotFound):
logging.error("Unable to find the page: %s", e.description)
code = 404
elif isinstance(e, InternalServerError):
logging.info("Unable to connect to server.")
code = 500
else:
logging.info("An exception has occurred.")
resp = jsonify(str(e))
resp.status_code = code
return resp
return application
if __name__ == '__main__':
app = init_app()
app.run(port='9001')