-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReviewAnalyzerEngine.py
81 lines (61 loc) · 3.6 KB
/
ReviewAnalyzerEngine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from ReviewAnalysisStrategies.NaiveBayesReviewAnalyzer import NaiveBayesReviewAnalyzer
from ReviewAnalysisStrategies.PatternReviewAnalyzer import PatternReviewAnalyzer
from ReviewObjects.ReviewPrinter import ReviewPrinter
from ScraperStrategies.HtmlRetrievers.HtmlRetriever import HtmlRetriever
from ScraperStrategies.ScrapyReviewScraper import ScrapyReviewScraper
from ScraperStrategies.SimpleReviewScraper import SimpleReviewScraper
class ReviewAnalyzerEngine(object):
def get_user_input(self):
self.review_analyzer = self.get_review_analyzer_from_prompt()
self.review_scraper = self.get_review_scraper_from_prompt()
self.number_of_crawlers = self.get_number_of_crawlers_from_prompt()
self.number_of_pages_to_scrape = self.get_number_of_pages_to_scrape_from_prompt()
self.number_of_reviews_to_display = self.get_number_of_reviews_to_display_from_prompt()
return
def get_review_analyzer_from_prompt(self):
valid_options = {"1":PatternReviewAnalyzer(),"2":NaiveBayesReviewAnalyzer()}
prompt = """Please enter the number of the type of analyzer you would like to use to score review positivity:\n
1. Basic Pattern Analyzer\n
2. Naive Bayes Analyzer\n"""
return self.get_valid_dict_option_from_prompt(prompt, valid_options)
def get_review_scraper_from_prompt(self):
valid_options = {"1": SimpleReviewScraper(html_retriever=HtmlRetriever()),"2": ScrapyReviewScraper()}
prompt = """Please enter the number of the type of scraper you would like to use to scrape review from the web:\n
1. Simple Web Scaper\n
2. Scrapy Web Scraper\n"""
return self.get_valid_dict_option_from_prompt(prompt, valid_options)
def get_valid_dict_option_from_prompt(self, prompt, option_dict):
option = ""
while option not in option_dict:
option = input(prompt)
return option_dict[option]
def get_number_of_crawlers_from_prompt(self):
number_of_crawlers = 1
if isinstance(self.review_scraper,ScrapyReviewScraper):
prompt = "Please enter the number of crawlers you would like to use:\n"
number_of_crawlers = self.get_pos_int_from_prompt(prompt)
return number_of_crawlers
def get_number_of_pages_to_scrape_from_prompt(self):
prompt = "Please enter the number of pages you would like to scrape reviews for:\n"
return self.get_pos_int_from_prompt(prompt)
def get_number_of_reviews_to_display_from_prompt(self):
prompt = "Please enter the number of reviews you would like to display:\n"
return self.get_pos_int_from_prompt(prompt)
def get_pos_int_from_prompt(self, prompt):
num = 0
while not isinstance(num, int) or num <= 0:
num = input(prompt)
if num.isdigit(): num = int(num)
return num
def run(self,get_user_input):
if get_user_input: self.get_user_input()
website = "http://www.dealerrater.com/dealer/McKaig-Chevrolet-Buick-A-Dealer-For-The-People-dealer-reviews-23685/"
target = "review-content"
review_list = self.review_scraper.scrape_reviews_from_page(website, target, self.number_of_pages_to_scrape, self.number_of_crawlers)
review_list = self.review_analyzer.analyze_reviews(review_list)
review_printer = ReviewPrinter()
review_printer.print_review_list(review_list,self.number_of_reviews_to_display)
return review_list
if __name__=="__main__":
review_analyzer_engine = ReviewAnalyzerEngine()
review_analyzer_engine.run(True)