From 1a55ee7443a0fece40f08733adfdfd1c05739ba5 Mon Sep 17 00:00:00 2001 From: Dead Teddy Date: Fri, 30 Aug 2019 12:02:48 +0530 Subject: [PATCH] Implemented NLTK --- Idea/1. Headline.md | 3 +- ReadMe.md | 3 + modules/headline.py | 74 +++++++++++++----------- modules/negative_headlines.csv | 42 ++++++++++++++ modules/positive_headlines.csv | 45 +++++++++++++++ modules/training_data.csv | 101 --------------------------------- test/test.py | 17 ++++++ 7 files changed, 150 insertions(+), 135 deletions(-) create mode 100644 modules/negative_headlines.csv create mode 100644 modules/positive_headlines.csv delete mode 100644 modules/training_data.csv create mode 100644 test/test.py diff --git a/Idea/1. Headline.md b/Idea/1. Headline.md index 329e2f6..28a1dea 100644 --- a/Idea/1. Headline.md +++ b/Idea/1. Headline.md @@ -1,4 +1,5 @@ -# 1. Headline +# 1. Headline +~Not Up to date~ ### Ideas for executing a headline rating diff --git a/ReadMe.md b/ReadMe.md index 219402d..e3b2259 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -18,4 +18,7 @@ The program aims to eliminate the prevailing dominance of Fake News all around t * AlphaPhiKappa * riseandshine0 +* mphirke +* mandjevant * webdotorg + diff --git a/modules/headline.py b/modules/headline.py index 8cbe76d..f486652 100644 --- a/modules/headline.py +++ b/modules/headline.py @@ -1,55 +1,63 @@ -import urllib.request -from bs4 import BeautifulSoup -from textblob.classifiers import NaiveBayesClassifier -from textblob import TextBlob +import newspaper +import nltk +from nltk.classify import NaiveBayesClassifier +from nltk.classify.util import accuracy class title: - #Initialisations + + def __init__(self): - self.news_url="https://edition.cnn.com/2019/08/25/politics/trump-g7-boris-johnson-emmanuel-macron/index.html" + self.news_url=input("\nEnter The URL : ") + self.pos=[] #Variable to store all positive tokens from positive_headlines.csv file + self.neg=[] #Variable to store all negative tokens from negative_headlines.csv file def extract_headline(self): - self.net_con=True #Expecting Internet Connection to be working initially + try: - news_page=urllib.request.urlopen(self.news_url) - soup = BeautifulSoup(news_page,'html.parser') - headline_in_html=soup.find('h1') - headline=headline_in_html.text.strip() - return headline + self.article = newspaper.Article(self.news_url) + self.article.download() + self.article.parse() + + except newspaper.article.ArticleException: #List possible errors in case of any exception + print("\nCONNECTION/URL ERROR: There may be a problem with your connection or the URL entered may be invalid") + article.title = "Invalid URL/Could not extract title" - except urllib.error.URLError: - print("\nCONNECTIION ERROR:There may be a connection problem. Please check if the device is connected to the Internet") - self.net_con=False #Value update if the program is unable to connenct + return self.article.title.strip() - #Adding Training Data - def train_data(self, headline): - try: - with open('training_data.csv','r') as td: - cl=NaiveBayesClassifier(td,format='csv') - sentiment=cl.classify(headline) - return sentiment + #Adding Training/Testing Data + def train(self,headline): - except: - if self.net_con==False: - pass - else: - print("\n\nProgram Error") + with open("positive_headlines.csv") as file: + for sentence in file: + self.pos.append([{word: True for word in nltk.word_tokenize(sentence)},'Positive']) + + with open("negative_headlines.csv") as file: + for sentence in file: + self.neg.append([{word: True for word in nltk.word_tokenize(sentence)},'Negative']) + + training=self.pos[:int(len(self.pos))] + self.neg[:int(len(self.neg))] + + classifier = NaiveBayesClassifier.train(training) #Training + sentiment=classifier.classify({word: True for word in nltk.word_tokenize(headline)}) + return sentiment def headline_category(self,headline,sentiment): + print("\nHEADLINE :",headline.upper()) + print("SENTIMENT :",sentiment) + print("AUTHOR(S) :",*self.article.authors,'\n') - analyse_headline=TextBlob(headline) - print("\n"+"Headline:",headline,"\n") - print("Headline Sentiment:",sentiment,"\n\n") def main(self): hdln=self.extract_headline() - sntmnt=self.train_data(hdln) + sntmnt=self.train(hdln) + self.train(hdln) self.headline_category(hdln,sntmnt) - + + if __name__=='__main__': do_ya_thing=title() - do_ya_thing.main() + do_ya_thing.main() \ No newline at end of file diff --git a/modules/negative_headlines.csv b/modules/negative_headlines.csv new file mode 100644 index 0000000..1abad8a --- /dev/null +++ b/modules/negative_headlines.csv @@ -0,0 +1,42 @@ +aba decides against community broadcasting licence +act fire witnesses must be aware of defamation +air nz staff in aust strike for pay rise +air nz strike to affect australian travellers +aussie qualifier stosur wastes four memphis match +australia is locked into war timetable opp +blizzard buries united states in bills +brigadier dismisses reports troops harassed in +british combat troops arriving daily in kuwait +bryant leads lakers to double overtime win +bushfire victims urged to see centrelink +businesses should prepare for terrorist attacks +carews freak goal leaves roma in ruins +cemeteries miss out on funds +council chief executive fails to secure position +crean tells alp leadership critics to shut up +dargo fire threat expected to rise +death toll continues to climb in south korean subway +direct anger at govt not soldiers crean urges +dispute over at smithton vegetable processing plant +dying korean subway passengers phoned for help +firefighters contain acid spill +four injured in head on highway crash +gilchrist backs rest policy +girl injured in head on highway crash +govt is to blame for ethanols unpopularity opp +griffiths under fire over project knock back +hacker gains access to eight million credit cards +hanson should go back where she came from nsw mp +harrington raring to go after break +investigation underway into elster creek spill +iraqs neighbours plead for continued un inspections +israeli forces push into gaza strip +kelly not surprised ethanol confidence low +korean subway fire 314 still missing +low demand forces air service cuts +man with knife hijacks light plane +more than 40 pc of young men drink alcohol at +more water restrictions predicted for northern tas +petrol bombs and water cannons mark violent escalation in hong kong protests +imran khan addresses pakistan on kashmir threatens nuclear war once again +FIR against NCP leader Ajit Pawar 69 others in Maharashtra co-op bank scam case \ No newline at end of file diff --git a/modules/positive_headlines.csv b/modules/positive_headlines.csv new file mode 100644 index 0000000..d12505a --- /dev/null +++ b/modules/positive_headlines.csv @@ -0,0 +1,45 @@ +ag calls for infrastructure protection summit +ambitious olsson wins triple jump +antic delighted with record breaking barca +aust addresses un security council over iraq +australia to contribute 10 million in aid to iraq +barca take record as robson celebrates birthday in +bathhouse plans move ahead +big hopes for launceston cycling championship +big plan to boost paroo water supplies +commonwealth bank cuts fixed home loan rates +community urged to help homeless youth +councillor to contest wollongong as independent +council moves to protect tas heritage garden +council welcomes ambulance levy decision +council welcomes insurance breakthrough +dems hold plebiscite over iraqi conflict +epa still trying to recover chemical clean up costs +freedom records net profit for third successive +funds allocated for domestic violence victims +funds allocated for youth at risk +funds announced for bridge work +funds to go to cadell upgrade +funds to help restore cossack +golf club feeling smoking ban impact +greens offer police station alternative +hanson is grossly naive over nsw issues costa +health minister backs organ and tissue storage +heavy metal de posits survey nearing end +investigations underway into death toll of korean +iraq to pay for own rebuilding white house +irish man arrested over omagh bombing +irrigators vote over river management +jury to consider verdict in murder case +juvenile sex offenders unlikely to reoffend as +last minute call hands alinghi big lead +man arrested after central qld hijack attempt +man charged over cooma murder +man fined after aboriginal tent embassy raid +man jailed over keno fraud +massive drug crop discovered in western nsw +mayor warns landfill protesters +meeting to consider tick clearance costs +meeting to focus on broken hill water woes +moderate lift in wages growth +Chandrayaan-2 maps lunar surface takes striking photos of craters on Moon \ No newline at end of file diff --git a/modules/training_data.csv b/modules/training_data.csv deleted file mode 100644 index 8c53f71..0000000 --- a/modules/training_data.csv +++ /dev/null @@ -1,101 +0,0 @@ -aba decides against community broadcasting licence,Negative -act fire witnesses must be aware of defamation,Negative -ag calls for infrastructure protection summit,Positive -air nz staff in aust strike for pay rise,Negative -air nz strike to affect australian travellers,Negative -ambitious olsson wins triple jump,Positive -antic delighted with record breaking barca,Positive -aussie qualifier stosur wastes four memphis match,Negative -aust addresses un security council over iraq,Positive -australia is locked into war timetable opp,Negative -australia to contribute 10 million in aid to iraq,Positive -barca take record as robson celebrates birthday in,Positive -bathhouse plans move ahead,Positive -big hopes for launceston cycling championship,Positive -big plan to boost paroo water supplies,Positive -blizzard buries united states in bills,Negative -brigadier dismisses reports troops harassed in,Negative -british combat troops arriving daily in kuwait,Negative -bryant leads lakers to double overtime win,Negative -bushfire victims urged to see centrelink,Negative -businesses should prepare for terrorist attacks,Negative -calleri avenges final defeat to eliminate massu,Neutral -call for ethanol blend fuel to go ahead,Neutral -carews freak goal leaves roma in ruins,Negative -cemeteries miss out on funds,Negative -code of conduct toughens organ donation regulations,Neutral -commonwealth bank cuts fixed home loan rates,Positive -community urged to help homeless youth,Positive -council chief executive fails to secure position,Negative -councillor to contest wollongong as independent,Positive -council moves to protect tas heritage garden,Positive -council welcomes ambulance levy decision,Positive -council welcomes insurance breakthrough,Positive -crean tells alp leadership critics to shut up,Negative -dargo fire threat expected to rise,Negative -death toll continues to climb in south korean subway,Negative -dems hold plebiscite over iraqi conflict,Positive -dent downs philippoussis in tie break thriller,Neutral -de villiers to learn fate on march 5,Neutral -digital tv will become commonplace summit,Neutral -direct anger at govt not soldiers crean urges,Negative -dispute over at smithton vegetable processing plant,Negative -dog mauls 18 month old toddler in nsw,Neutral -dying korean subway passengers phoned for help,Negative -england change three for wales match,Neutral -epa still trying to recover chemical clean up costs,Positive -expressions of interest sought to build livestock,Neutral -fed opp to re introduce national insurance,Neutral -firefighters contain acid spill,Negative -four injured in head on highway crash,Negative -freedom records net profit for third successive,Positive -funds allocated for domestic violence victims,Positive -funds allocated for youth at risk,Positive -funds announced for bridge work,Positive -funds to go to cadell upgrade,Positive -funds to help restore cossack,Positive -german court to give verdict on sept 11 accused,Neutral -gilchrist backs rest policy,Negative -girl injured in head on highway crash,Negative -gold coast to hear about bilby project,Neutral -golf club feeling smoking ban impact,Positive -govt is to blame for ethanols unpopularity opp,Negative -greens offer police station alternative,Positive -griffiths under fire over project knock back,Negative -group to meet in north west wa over rock art,Neutral -hacker gains access to eight million credit cards,Negative -hanson is grossly naive over nsw issues costa,Positive -hanson should go back where she came from nsw mp,Negative -harrington raring to go after break,Negative -health minister backs organ and tissue storage,Positive -heavy metal deposits survey nearing end,Positive -injured rios pulls out of buenos aires open,Neutral -inquest finds mans death accidental,Neutral -investigations underway into death toll of korean,Positive -investigation underway into elster creek spill,Negative -iraqs neighbours plead for continued un inspections,Negative -iraq to pay for own rebuilding white house,Positive -irish man arrested over omagh bombing,Positive -irrigators vote over river management,Positive -israeli forces push into gaza strip,Negative -jury to consider verdict in murder case,Positive -juvenile sex offenders unlikely to reoffend as,Positive -kelly disgusted at alleged bp ethanol scare,Neutral -kelly not surprised ethanol confidence low,Negative -korean subway fire 314 still missing,Negative -last minute call hands alinghi big lead,Positive -low demand forces air service cuts,Negative -man arrested after central qld hijack attempt,Positive -man charged over cooma murder,Positive -man fined after aboriginal tent embassy raid,Positive -man jailed over keno fraud,Positive -man with knife hijacks light plane,Negative -martin to lobby against losing nt seat in fed,Neutral -massive drug crop discovered in western nsw,Positive -mayor warns landfill protesters,Positive -meeting to consider tick clearance costs,Positive -meeting to focus on broken hill water woes,Positive -moderate lift in wages growth,Positive -more than 40 pc of young men drink alcohol at,Negative -more water restrictions predicted for northern tas,Negative -Petrol bombs and water cannons mark violent escalation in Hong Kong protests, negative \ No newline at end of file diff --git a/test/test.py b/test/test.py new file mode 100644 index 0000000..342c5c2 --- /dev/null +++ b/test/test.py @@ -0,0 +1,17 @@ +def train_classifier(self,headline): + + a=input("""\nIf you think the output was incorrect, Please re-label the headline's sentiment to train the classifier + & help improve future predictions [p/n]: """) + + if a=='p': + with open('positive_headlines.csv','a') as td: + td.write('\n'+headline) + elif a=='n': + with open('negative_headlines.csv','a') as td: + td.write('\n'+headline) + else: + print("Incorrect key pressed!") + pass + + +self.train_classifier(hdln) \ No newline at end of file