-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfnapp.py
174 lines (156 loc) · 7.54 KB
/
fnapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import streamlit as st
import joblib
import numpy as np
import spacy
import pandas as pd
from itertools import zip_longest
# Add dependency parsing visualization
from spacy import displacy
# Add language translation feature
from googletrans import Translator, LANGUAGES
# For WordCloud and others:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
# Load the models and vectorizer
spacy_nlp = spacy.load('en_core_web_sm') # to load an English-based model
model = joblib.load('model.pkl')
vectorizer = joblib.load('vectorizer.pkl')
# Get language codes and names
language_codes = list(LANGUAGES.keys())
language_names = list(LANGUAGES.values())
translator = Translator()
# Function to make predictions
@st.cache
def predict_fake_news(title, text):
try:
tf_idf = vectorizer.transform([f'{title} {text}'])
prediction = model.predict(tf_idf)
# Now, since the model is a PassiveAggressiveClassifier, we have to do things a little bit differently.
decision_vals = model.decision_function(tf_idf)
a = 1 - (1. / (1. + np.exp(decision_vals)))
b = -(a - 1)
probs = np.stack((b, a), axis=1)
probability = probs[0]
return prediction, probability[0]
except Exception as e:
return "An error occurred during prediction:", str(e)
# Streamlit app
st.title("MaLeNAD: ML-Powered News Article Analysis and Fake News Detection App")
st.subheader('Analyse news articles and discern between fact and fiction, with this tool powered by machine learning!')
choices = ["Fake News Prediction", "Natural Language Processing", "Language Translation"]
st.sidebar.title("Welcome! :blush:")
choice = st.sidebar.selectbox("What do you want to do today?", choices)
if choice == "Fake News Prediction":
st.info("You are now utilizing the Fake News Prediction module. Please note that predictions may not be 100% accurate.")
# Input form for users
title_input = st.text_input("Enter the title of the news article:")
text_input = st.text_area("Enter the text of the news article:")
# Make prediction when the user clicks the button
if st.button("Predict"):
with st.spinner("Processing, please wait..."):
if title_input and text_input:
prediction, probability = predict_fake_news(title_input, text_input)
st.success("We can predict that this news article is {}, with {:.2f}% certainty.".format(prediction[0], probability[0] * 100))
else:
st.error("Please enter all required text. Thank you.")
elif choice == "Language Translation":
st.info("You are now utilizing the Language Translation module, powered by Google Translate. Please note that translations may not be 100% accurate.")
news_text = st.text_area("Enter your news article text:")
target_language = st.selectbox("Select target language:", language_codes, format_func=lambda x: LANGUAGES[x])
# Format the entire list of language names
formatted_language_names = [LANGUAGES[code] for code in language_codes]
if st.button("Translate!"):
with st.spinner("Translating, please wait..."):
if news_text:
translated_text = translator.translate(text = news_text, dest=target_language).text
st.subheader(f"Translated Text ({target_language}):")
st.text_area("Translated Text:", translated_text)
else:
st.error(f"Please enter the required text. Thank you.")
else:
st.info("You are now utilizing the Natural Language Processing module.")
news_text = st.text_area("Enter your news article text:")
tasks = ["Tokenization", "Dependency Parsing Visualization", "Named Entity Recognition", "Lemmatization", "Parts of Speech (POS) Tagging"]
task_choice = st.selectbox("What do you want to do here?", tasks)
if st.button("Process Text!"):
with st.spinner("Processing, please wait..."):
if news_text:
doc = spacy_nlp(news_text)
# Inside the "Natural Language Processing module" block
if task_choice == "Dependency Parsing Visualization":
st.subheader("Dependency Parsing Visualization")
st.write("Here is the dependency parsing tree for the input text:")
rendition = displacy.render(doc, style="dep", options={'compact': True,'distance': 100})
st.image(rendition, width=100)
elif task_choice == "Tokenization":
result = [token.text for token in doc]
st.json(result)
elif task_choice == "Named Entity Recognition":
result = [(ent.text, ent.label_) for ent in doc.ents]
st.json(result)
elif task_choice == "Lemmatization":
result = ["Token:{}, Lemma:{}".format(token.text, token.lemma_) for token in doc]
st.json(result)
elif task_choice == "Parts of Speech (POS) Tagging":
result = ["Token:{}, POS:{}, Dependency:{}".format(token.text, token.tag_, token.dep_) for token in doc]
st.json(result)
else:
st.error("Please enter the required text. Thank you.")
if st.button("Tabulate Information!"):
with st.spinner("Tabulating, please wait..."):
# Add a slider for selecting the number of rows to display
num_rows = st.slider("Select the number of rows to display:", min_value=1, max_value=100, value=10)
doc = spacy_nlp(news_text)
tokens = [token.text for token in doc]
entities = [(ent.label_) for ent in doc.ents]
tags = [(token.tag_) for token in doc]
# Create a list of entity labels for each token
# This part of the code creates a list (entity_labels_for_tokens) where each element corresponds to a token.
# It iterates through the named entities (doc.ents) and assigns the entity label to the appropriate range of tokens.
entity_labels_for_tokens = [""] * len(tokens)
for ent in doc.ents:
for i in range(ent.start, ent.end):
entity_labels_for_tokens[i] = ent.label_
# Use zip_longest to ensure all columns have the same length
zipped_data = zip_longest(tokens, entity_labels_for_tokens, tags, fillvalue="")
# Create a DataFrame linking all three sets of information together:
df = pd.DataFrame(zipped_data, columns = ["Tokens", "Lemma", "POS Tags"])
st.dataframe(df.head(num_rows))
if st.checkbox("Visualize a WordCloud"):
with st.spinner("Visualizing, please wait..."):
wc = WordCloud().generate(news_text)
plt.imshow(wc, interpolation = "bilinear")
plt.axis("off")
st.set_option('deprecation.showPyplotGlobalUse', False)
st.pyplot()
footer="""<style>
a:link , a:visited{
color: blue;
background-color: transparent;
text-decoration: none;
}
a:hover, a:active {
color: red;
background-color: transparent;
text-decoration: none;
}
div[data-testid="stHorizontalBlock"] div[role="slider"] {
background-color: #3498db;
}
.footer {
position: fixed;
left: 0;
bottom: 0;
width: 100%;
background-color: white;
color: black;
text-align: center;
}
</style>
<div class="footer">
<p>Developed with ML and ❤ by <a style='display: block; text-align: center;' href="https://www.github.com/samuelbolugee" target="_blank">S. B. Olugunna</a></p>
</div>
"""
st.markdown(footer,unsafe_allow_html=True)