-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 79682cd
Showing
2 changed files
with
280 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,280 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### <b>Fake News Classifier model using TfidfVectorizer and PassiveAggressiveClassifier</b>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 40, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import seaborn as sb\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"from sklearn.model_selection import train_test_split\n", | ||
"from sklearn.feature_extraction.text import TfidfVectorizer\n", | ||
"from sklearn.linear_model import PassiveAggressiveClassifier" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 41, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#reading data into pandas dataframe\n", | ||
"df=pd.read_csv('news.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 42, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>Unnamed: 0</th>\n", | ||
" <th>title</th>\n", | ||
" <th>text</th>\n", | ||
" <th>label</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <td>0</td>\n", | ||
" <td>8476</td>\n", | ||
" <td>You Can Smell Hillary’s Fear</td>\n", | ||
" <td>Daniel Greenfield, a Shillman Journalism Fello...</td>\n", | ||
" <td>FAKE</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <td>1</td>\n", | ||
" <td>10294</td>\n", | ||
" <td>Watch The Exact Moment Paul Ryan Committed Pol...</td>\n", | ||
" <td>Google Pinterest Digg Linkedin Reddit Stumbleu...</td>\n", | ||
" <td>FAKE</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <td>2</td>\n", | ||
" <td>3608</td>\n", | ||
" <td>Kerry to go to Paris in gesture of sympathy</td>\n", | ||
" <td>U.S. Secretary of State John F. Kerry said Mon...</td>\n", | ||
" <td>REAL</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <td>3</td>\n", | ||
" <td>10142</td>\n", | ||
" <td>Bernie supporters on Twitter erupt in anger ag...</td>\n", | ||
" <td>— Kaydee King (@KaydeeKing) November 9, 2016 T...</td>\n", | ||
" <td>FAKE</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <td>4</td>\n", | ||
" <td>875</td>\n", | ||
" <td>The Battle of New York: Why This Primary Matters</td>\n", | ||
" <td>It's primary day in New York and front-runners...</td>\n", | ||
" <td>REAL</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" Unnamed: 0 title \\\n", | ||
"0 8476 You Can Smell Hillary’s Fear \n", | ||
"1 10294 Watch The Exact Moment Paul Ryan Committed Pol... \n", | ||
"2 3608 Kerry to go to Paris in gesture of sympathy \n", | ||
"3 10142 Bernie supporters on Twitter erupt in anger ag... \n", | ||
"4 875 The Battle of New York: Why This Primary Matters \n", | ||
"\n", | ||
" text label \n", | ||
"0 Daniel Greenfield, a Shillman Journalism Fello... FAKE \n", | ||
"1 Google Pinterest Digg Linkedin Reddit Stumbleu... FAKE \n", | ||
"2 U.S. Secretary of State John F. Kerry said Mon... REAL \n", | ||
"3 — Kaydee King (@KaydeeKing) November 9, 2016 T... FAKE \n", | ||
"4 It's primary day in New York and front-runners... REAL " | ||
] | ||
}, | ||
"execution_count": 42, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"#printing first 5 entries in the table/dataframe\n", | ||
"df.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 43, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#deciding predicate and predicator variables\n", | ||
"x=df['text']\n", | ||
"y=df['label']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 47, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#splitting data into train dataset and test dataset\n", | ||
"x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 48, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#vectorization and fitting data \n", | ||
"tf_vect=TfidfVectorizer(stop_words='english',max_df=0.6)\n", | ||
"tf_train=tf_vect.fit_transform(x_train) \n", | ||
"tf_test=tf_vect.transform(x_test)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 54, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#predicting lables of test data using PassiveAgrressiveClassifier\n", | ||
"passiveac=PassiveAggressiveClassifier(max_iter=60) #more the value of max_iter, more accuracy can be obtained\n", | ||
"passiveac.fit(tfidf_train,y_train)\n", | ||
"pred = passiveac.predict(tfidf_test)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 55, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#calculating accuracy\n", | ||
"from sklearn import metrics\n", | ||
"from sklearn.metrics import classification_report,confusion_matrix,accuracy_score" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 56, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" precision recall f1-score support\n", | ||
"\n", | ||
" FAKE 0.94 0.93 0.93 933\n", | ||
" REAL 0.93 0.94 0.93 968\n", | ||
"\n", | ||
" accuracy 0.93 1901\n", | ||
" macro avg 0.93 0.93 0.93 1901\n", | ||
"weighted avg 0.93 0.93 0.93 1901\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(classification_report(y_test, pred))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 57, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[[864 69]\n", | ||
" [ 60 908]]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(confusion_matrix(y_test,pred))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 58, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"0.932140978432404\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(accuracy_score(y_test,pred))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#Fake News classifier model " | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Binary file not shown.