Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create anti-malware application that uses ML, wikipedia bot, WikiLLM and RAG boilerplate #2380

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Python/Anti-Malware_application/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# A ML-based anti-malware program written in Python #

<p> I first created this project for a competition. I had to learn a lot of new stuff and take inspiration(and a bit of code) from others who had tried similar projects before. This project uses customtkinter and scikit-learn to handle the GUI and ML operations respectively. It utilizes a decision tree classifier to classify files as malware or benign. It can classify a single file or all the files in a folder. It currently only works with executable files(but can be modified to work with other file types). Using the PEfile library, this program extracts information from the executable file(s) to classify them.


Note: It is not perfect and is prone to a lot of false-positives, but I hardly encountered any false-negatives. I think this is due to the overfitting of the model.

### Installation and execution

1) Install the necessary libraries
```
pip3 install customtkinter
pip3 install tkinter
pip3 install pandas
pip3 install sklearn
pip3 install pefile
pip3 install numpy

```

2) Execute the program
```
python3 anti-malware.py
```

Happy learning!
241 changes: 241 additions & 0 deletions Python/Anti-Malware_application/anti-malware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
#################################################################################
### Author: Pyerie #
### Application: A not-so-accurate ML based anti-malware solution #
#################################################################################

print("[+] Loading.... ")
import customtkinter
from tkinter.filedialog import *
from tkinter import *
import pefile
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
import os



dataset = pd.read_csv('database3.csv')
X = dataset.drop(['legitimate'],axis=1).values

y = dataset['legitimate'].values



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
clf = DecisionTreeClassifier()


y_test = y_test.reshape(-1,1)
for i in range(0, 10):
clf = clf.fit(X_train,y_train)
res1 = clf.predict(X_test)
accuracy = metrics.accuracy_score(y_test, res1)
accuracy = str(accuracy)[2:4] + "%"
print("Accuracy: "+accuracy)


customtkinter.set_appearance_mode("dark")
customtkinter.set_default_color_theme("dark-blue")


window = Tk()
screen_width = window.winfo_screenwidth()
screen_height = window.winfo_screenheight()
window.geometry(str(screen_width)+"x"+str(screen_height))
window.title("eSuraksha")
window['bg'] = "#121212"
def extract_features(file):
features = []



try:

pe_obj = pefile.PE(file, fast_load=True)
except pefile.PEFormatError as error:
print("Not PE file!")

features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[6].Size)
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[6].VirtualAddress)
features.append(pe_obj.OPTIONAL_HEADER.MajorImageVersion)
features.append(pe_obj.OPTIONAL_HEADER.MajorOperatingSystemVersion)
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[0].VirtualAddress)
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[0].Size)
try:
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[12].VirtualAddress)
except:
features.append(0)
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size)
features.append(pe_obj.OPTIONAL_HEADER.MajorLinkerVersion)
features.append(pe_obj.FILE_HEADER.NumberOfSections)
features.append(pe_obj.OPTIONAL_HEADER.SizeOfStackReserve)
features.append(pe_obj.OPTIONAL_HEADER.DllCharacteristics)
features.append(pe_obj.OPTIONAL_HEADER.AddressOfEntryPoint)
features.append(pe_obj.OPTIONAL_HEADER.ImageBase)






return features

toplevel_created = False

toplevel2_created = False

def single_file():

global toplevel_created
global toplevel2_created
global single_file_top
if toplevel_created == "True":
single_file_top.destroy()
toplevel_created = "False"
elif toplevel_created == "False":
pass

if toplevel2_created == "True":
many_files.destroy()
toplevel2_created = "False"
elif toplevel2_created == "False":
pass

single_file_top = Toplevel(window)
single_file_top.geometry("350x200")
customtkinter.set_appearance_mode("dark")
customtkinter.set_default_color_theme("dark-blue")
single_file_top['bg'] = "#121212"
single_file_top.title("Scan a single file")
toplevel_created = "True"
result = customtkinter.CTkLabel(single_file_top, text="Loading...")
result.pack()

file_path = askopenfilename()
try:
features_extracted = extract_features(str(file_path))
not_pe = False
except UnboundLocalError as e:
not_pe = True
result.after(0, result.destroy)
benign_l = customtkinter.CTkLabel(single_file_top, text="Not PE file!")
benign_l.pack()
toplevel2_created = False

if not_pe != True:
data_of_sample = np.array(features_extracted)
data_of_sample = data_of_sample.reshape(1,-1)


prediction = clf.predict(data_of_sample)


if prediction == 1:
result.after(0, result.destroy)

malware_l = customtkinter.CTkLabel(single_file_top, fg_color="red", text="ML model detected malware!")
malware_l.pack()


elif prediction == 0:
result.after(0, result.destroy)
benign_l = customtkinter.CTkLabel(single_file_top, fg_color="green", text="No malware detected!")
benign_l.pack()


def scan_many():


global toplevel2_created
global toplevel_created
global many_files

if toplevel2_created == "True":
many_files.destroy()
toplevel2_created = "False"
elif toplevel2_created == "False":
pass

if toplevel_created == "True":
single_file_top.destroy()
toplevel_created = "False"
elif toplevel_created == "False":
pass

many_files = Toplevel(window)
many_files.geometry("350x200")
customtkinter.set_appearance_mode("dark")
customtkinter.set_default_color_theme("dark-blue")
many_files['bg'] = "#121212"
many_files.title("Scan a directory")
toplevel2_created = "True"
result2 = customtkinter.CTkLabel(many_files, text="Loading...")
result2.pack()
malware_many = []
directory = askdirectory()
global extracted
for root, directory, files in os.walk(str(directory)):
for name_of_file in files:
path = os.path.join(str(root),str(name_of_file))

formats_of_pe = [".acm" , ".ax" , ".cpl" , ".dll" , ".drv" , ".efi" , ".exe" , ".mui" , ".ocx" , ".scr" , ".sys" , ".tsp", ".bin"]
for format_i in formats_of_pe:
if name_of_file.endswith(format_i) == True:

extracted = 1
try:

features_of_many = extract_features(str(path))
except UnboundLocalError as e:
pass
break

else:
extracted = 0



if extracted == 1:
data_for_many = np.array(features_of_many)
data_for_many = data_for_many.reshape(1,-1)

prediction_for_many = clf.predict(data_for_many)


if prediction_for_many == 1:
malware_many.append(str(path))


if len(malware_many) != 0:
result2.after(0, result2.destroy)
malware_label2 = customtkinter.CTkLabel(many_files,text="Malware found: ")
malware_label2.pack()
malware_text_box = customtkinter.CTkTextbox(many_files)
for_text_box = ''

for name_of_malware in malware_many:
for_text_box += "".join([name_of_malware, '\n------------------------------------------'])



malware_text_box.insert('0.0',for_text_box)
malware_text_box.configure(state="disabled")
malware_text_box.pack()




elif len(malware_many) == 0:
result2.after(0, result2.destroy)
benign_label = customtkinter.CTkLabel(many_files,text="No malware found!")
benign_label.pack()

button1 = customtkinter.CTkButton(master=window, command=single_file,text="Scan a single file")
button1.pack()
button2 = customtkinter.CTkButton(master=window, command=scan_many, text="Scan a folder")
button2.pack()

window.mainloop()
Loading