-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
75837c0
commit 1a55ee7
Showing
7 changed files
with
150 additions
and
135 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# 1. Headline | ||
# 1. Headline | ||
~Not Up to date~ | ||
|
||
### Ideas for executing a headline rating | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,55 +1,63 @@ | ||
import urllib.request | ||
from bs4 import BeautifulSoup | ||
from textblob.classifiers import NaiveBayesClassifier | ||
from textblob import TextBlob | ||
import newspaper | ||
import nltk | ||
from nltk.classify import NaiveBayesClassifier | ||
from nltk.classify.util import accuracy | ||
|
||
class title: | ||
|
||
#Initialisations | ||
|
||
|
||
def __init__(self): | ||
self.news_url="https://edition.cnn.com/2019/08/25/politics/trump-g7-boris-johnson-emmanuel-macron/index.html" | ||
self.news_url=input("\nEnter The URL : ") | ||
self.pos=[] #Variable to store all positive tokens from positive_headlines.csv file | ||
self.neg=[] #Variable to store all negative tokens from negative_headlines.csv file | ||
|
||
|
||
def extract_headline(self): | ||
self.net_con=True #Expecting Internet Connection to be working initially | ||
|
||
try: | ||
news_page=urllib.request.urlopen(self.news_url) | ||
soup = BeautifulSoup(news_page,'html.parser') | ||
headline_in_html=soup.find('h1') | ||
headline=headline_in_html.text.strip() | ||
return headline | ||
self.article = newspaper.Article(self.news_url) | ||
self.article.download() | ||
self.article.parse() | ||
|
||
except newspaper.article.ArticleException: #List possible errors in case of any exception | ||
print("\nCONNECTION/URL ERROR: There may be a problem with your connection or the URL entered may be invalid") | ||
article.title = "Invalid URL/Could not extract title" | ||
|
||
except urllib.error.URLError: | ||
print("\nCONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet") | ||
self.net_con=False #Value update if the program is unable to connenct | ||
return self.article.title.strip() | ||
|
||
|
||
#Adding Training Data | ||
def train_data(self, headline): | ||
try: | ||
with open('training_data.csv','r') as td: | ||
cl=NaiveBayesClassifier(td,format='csv') | ||
sentiment=cl.classify(headline) | ||
return sentiment | ||
#Adding Training/Testing Data | ||
def train(self,headline): | ||
|
||
except: | ||
if self.net_con==False: | ||
pass | ||
else: | ||
print("\n\nProgram Error") | ||
with open("positive_headlines.csv") as file: | ||
for sentence in file: | ||
self.pos.append([{word: True for word in nltk.word_tokenize(sentence)},'Positive']) | ||
|
||
with open("negative_headlines.csv") as file: | ||
for sentence in file: | ||
self.neg.append([{word: True for word in nltk.word_tokenize(sentence)},'Negative']) | ||
|
||
training=self.pos[:int(len(self.pos))] + self.neg[:int(len(self.neg))] | ||
|
||
classifier = NaiveBayesClassifier.train(training) #Training | ||
sentiment=classifier.classify({word: True for word in nltk.word_tokenize(headline)}) | ||
return sentiment | ||
|
||
|
||
def headline_category(self,headline,sentiment): | ||
print("\nHEADLINE :",headline.upper()) | ||
print("SENTIMENT :",sentiment) | ||
print("AUTHOR(S) :",*self.article.authors,'\n') | ||
|
||
analyse_headline=TextBlob(headline) | ||
print("\n"+"Headline:",headline,"\n") | ||
print("Headline Sentiment:",sentiment,"\n\n") | ||
|
||
def main(self): | ||
hdln=self.extract_headline() | ||
sntmnt=self.train_data(hdln) | ||
sntmnt=self.train(hdln) | ||
self.train(hdln) | ||
self.headline_category(hdln,sntmnt) | ||
|
||
|
||
|
||
if __name__=='__main__': | ||
do_ya_thing=title() | ||
do_ya_thing.main() | ||
do_ya_thing.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
aba decides against community broadcasting licence | ||
act fire witnesses must be aware of defamation | ||
air nz staff in aust strike for pay rise | ||
air nz strike to affect australian travellers | ||
aussie qualifier stosur wastes four memphis match | ||
australia is locked into war timetable opp | ||
blizzard buries united states in bills | ||
brigadier dismisses reports troops harassed in | ||
british combat troops arriving daily in kuwait | ||
bryant leads lakers to double overtime win | ||
bushfire victims urged to see centrelink | ||
businesses should prepare for terrorist attacks | ||
carews freak goal leaves roma in ruins | ||
cemeteries miss out on funds | ||
council chief executive fails to secure position | ||
crean tells alp leadership critics to shut up | ||
dargo fire threat expected to rise | ||
death toll continues to climb in south korean subway | ||
direct anger at govt not soldiers crean urges | ||
dispute over at smithton vegetable processing plant | ||
dying korean subway passengers phoned for help | ||
firefighters contain acid spill | ||
four injured in head on highway crash | ||
gilchrist backs rest policy | ||
girl injured in head on highway crash | ||
govt is to blame for ethanols unpopularity opp | ||
griffiths under fire over project knock back | ||
hacker gains access to eight million credit cards | ||
hanson should go back where she came from nsw mp | ||
harrington raring to go after break | ||
investigation underway into elster creek spill | ||
iraqs neighbours plead for continued un inspections | ||
israeli forces push into gaza strip | ||
kelly not surprised ethanol confidence low | ||
korean subway fire 314 still missing | ||
low demand forces air service cuts | ||
man with knife hijacks light plane | ||
more than 40 pc of young men drink alcohol at | ||
more water restrictions predicted for northern tas | ||
petrol bombs and water cannons mark violent escalation in hong kong protests | ||
imran khan addresses pakistan on kashmir threatens nuclear war once again | ||
FIR against NCP leader Ajit Pawar 69 others in Maharashtra co-op bank scam case |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
ag calls for infrastructure protection summit | ||
ambitious olsson wins triple jump | ||
antic delighted with record breaking barca | ||
aust addresses un security council over iraq | ||
australia to contribute 10 million in aid to iraq | ||
barca take record as robson celebrates birthday in | ||
bathhouse plans move ahead | ||
big hopes for launceston cycling championship | ||
big plan to boost paroo water supplies | ||
commonwealth bank cuts fixed home loan rates | ||
community urged to help homeless youth | ||
councillor to contest wollongong as independent | ||
council moves to protect tas heritage garden | ||
council welcomes ambulance levy decision | ||
council welcomes insurance breakthrough | ||
dems hold plebiscite over iraqi conflict | ||
epa still trying to recover chemical clean up costs | ||
freedom records net profit for third successive | ||
funds allocated for domestic violence victims | ||
funds allocated for youth at risk | ||
funds announced for bridge work | ||
funds to go to cadell upgrade | ||
funds to help restore cossack | ||
golf club feeling smoking ban impact | ||
greens offer police station alternative | ||
hanson is grossly naive over nsw issues costa | ||
health minister backs organ and tissue storage | ||
heavy metal de posits survey nearing end | ||
investigations underway into death toll of korean | ||
iraq to pay for own rebuilding white house | ||
irish man arrested over omagh bombing | ||
irrigators vote over river management | ||
jury to consider verdict in murder case | ||
juvenile sex offenders unlikely to reoffend as | ||
last minute call hands alinghi big lead | ||
man arrested after central qld hijack attempt | ||
man charged over cooma murder | ||
man fined after aboriginal tent embassy raid | ||
man jailed over keno fraud | ||
massive drug crop discovered in western nsw | ||
mayor warns landfill protesters | ||
meeting to consider tick clearance costs | ||
meeting to focus on broken hill water woes | ||
moderate lift in wages growth | ||
Chandrayaan-2 maps lunar surface takes striking photos of craters on Moon |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
def train_classifier(self,headline): | ||
|
||
a=input("""\nIf you think the output was incorrect, Please re-label the headline's sentiment to train the classifier | ||
& help improve future predictions [p/n]: """) | ||
|
||
if a=='p': | ||
with open('positive_headlines.csv','a') as td: | ||
td.write('\n'+headline) | ||
elif a=='n': | ||
with open('negative_headlines.csv','a') as td: | ||
td.write('\n'+headline) | ||
else: | ||
print("Incorrect key pressed!") | ||
pass | ||
|
||
|
||
self.train_classifier(hdln) |