Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
mithu975 authored May 31, 2020
0 parents commit 79682cd
Show file tree
Hide file tree
Showing 2 changed files with 280 additions and 0 deletions.
280 changes: 280 additions & 0 deletions Fake News Classifier.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### <b>Fake News Classifier model using TfidfVectorizer and PassiveAggressiveClassifier</b>"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sb\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.linear_model import PassiveAggressiveClassifier"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"#reading data into pandas dataframe\n",
"df=pd.read_csv('news.csv')"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>title</th>\n",
" <th>text</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>8476</td>\n",
" <td>You Can Smell Hillary’s Fear</td>\n",
" <td>Daniel Greenfield, a Shillman Journalism Fello...</td>\n",
" <td>FAKE</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>10294</td>\n",
" <td>Watch The Exact Moment Paul Ryan Committed Pol...</td>\n",
" <td>Google Pinterest Digg Linkedin Reddit Stumbleu...</td>\n",
" <td>FAKE</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>3608</td>\n",
" <td>Kerry to go to Paris in gesture of sympathy</td>\n",
" <td>U.S. Secretary of State John F. Kerry said Mon...</td>\n",
" <td>REAL</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>10142</td>\n",
" <td>Bernie supporters on Twitter erupt in anger ag...</td>\n",
" <td>— Kaydee King (@KaydeeKing) November 9, 2016 T...</td>\n",
" <td>FAKE</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>875</td>\n",
" <td>The Battle of New York: Why This Primary Matters</td>\n",
" <td>It's primary day in New York and front-runners...</td>\n",
" <td>REAL</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 title \\\n",
"0 8476 You Can Smell Hillary’s Fear \n",
"1 10294 Watch The Exact Moment Paul Ryan Committed Pol... \n",
"2 3608 Kerry to go to Paris in gesture of sympathy \n",
"3 10142 Bernie supporters on Twitter erupt in anger ag... \n",
"4 875 The Battle of New York: Why This Primary Matters \n",
"\n",
" text label \n",
"0 Daniel Greenfield, a Shillman Journalism Fello... FAKE \n",
"1 Google Pinterest Digg Linkedin Reddit Stumbleu... FAKE \n",
"2 U.S. Secretary of State John F. Kerry said Mon... REAL \n",
"3 — Kaydee King (@KaydeeKing) November 9, 2016 T... FAKE \n",
"4 It's primary day in New York and front-runners... REAL "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#printing first 5 entries in the table/dataframe\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"#deciding predicate and predicator variables\n",
"x=df['text']\n",
"y=df['label']"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"#splitting data into train dataset and test dataset\n",
"x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"#vectorization and fitting data \n",
"tf_vect=TfidfVectorizer(stop_words='english',max_df=0.6)\n",
"tf_train=tf_vect.fit_transform(x_train) \n",
"tf_test=tf_vect.transform(x_test)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"#predicting lables of test data using PassiveAgrressiveClassifier\n",
"passiveac=PassiveAggressiveClassifier(max_iter=60) #more the value of max_iter, more accuracy can be obtained\n",
"passiveac.fit(tfidf_train,y_train)\n",
"pred = passiveac.predict(tfidf_test)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"#calculating accuracy\n",
"from sklearn import metrics\n",
"from sklearn.metrics import classification_report,confusion_matrix,accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" FAKE 0.94 0.93 0.93 933\n",
" REAL 0.93 0.94 0.93 968\n",
"\n",
" accuracy 0.93 1901\n",
" macro avg 0.93 0.93 0.93 1901\n",
"weighted avg 0.93 0.93 0.93 1901\n",
"\n"
]
}
],
"source": [
"print(classification_report(y_test, pred))"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[864 69]\n",
" [ 60 908]]\n"
]
}
],
"source": [
"print(confusion_matrix(y_test,pred))"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.932140978432404\n"
]
}
],
"source": [
"print(accuracy_score(y_test,pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Fake News classifier model "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added news.zip
Binary file not shown.

0 comments on commit 79682cd

Please sign in to comment.