diff --git a/MachineLearning/Session1/AlWaseet Listing.ods b/MachineLearning/Session1/AlWaseet Listing.ods new file mode 100644 index 0000000..4e30f3d Binary files /dev/null and b/MachineLearning/Session1/AlWaseet Listing.ods differ diff --git a/MachineLearning/Session1/AlWaseet.csv b/MachineLearning/Session1/AlWaseet.csv new file mode 100644 index 0000000..c30955d --- /dev/null +++ b/MachineLearning/Session1/AlWaseet.csv @@ -0,0 +1,73 @@ +PRICE,ORIGIN,MAKE,MODEL,YEAR,MILEAGE,COLOR,HYBRID,DIESEL,4WD,CONVERTIBLE,LEATHER,AUTOTRANSMISSION,AUTOWINDOWS,AC,HEATEDSEATS,CHASSIS_FR,CHASSIS_FL,CHASSIS_RR,CHASSIS_RL +27500,JP,TOYOTA,PRADO,2009,80000,BLACK,0,0,1,0,0,1,1,1,0,0,0,0,0 +10700,FR,CITROEN,C4,2007,65000,BLACK,0,0,0,0,0,0,1,1,0,0,0,0,0 +32000,JP,LEXUS,GS450,2007,90000,,1,0,0,0,0,1,1,1,0,0,0,0,0 +8700,US,CHEVROLET,AVEO,2009,60000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0 +18000,US,CHEVROLET,TRAILBLAZER,2009,67000,BLUE,0,0,1,0,0,1,1,1,0,0,0,0,0 +17000,US,FORD,MUSTANG,2002,92000,NAVY BLUE,0,0,0,0,0,0,1,1,0,0,0,0,0 +9100,KR,HYUNDAI,I10,2013,12000,VINOUS,0,0,0,0,0,0,1,1,0,0,0,0,0 +12700,KR,HYUNDAI,AVANTI,2010,27000,SILVER,0,0,0,0,0,1,1,1,1,0,0,0,0 +11850,KR,HYUNDAI,AVANTI,2008,37000,SILVER,0,0,0,0,0,1,1,1,1,0,0,0,0 +20500,KR,HYUNDAI,SONATA,2011,20000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0 +65000,US,JEEP,CHEROKEE,2012,12000,SILVER,0,0,1,0,0,1,1,1,0,0,0,0,0 +13500,KR,KIA,CERATO,2010,27000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +14300,KR,KIA,SPORTAGE,2008,47000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +32500,DE,MERCEDES,C200,2009,64000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0 +22500,DE,MERCEDES,C180,2005,74000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +17200,JP,NISSAN,ALTIMA,2008,65000,BLACK,0,0,0,0,1,1,1,1,0,0,0,0,0 +18800,DE,OPEL,ASTRA,2011,24000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0 +11400,FR,PEUGEOT,308,2008,66000,GREEN,0,0,0,0,0,1,1,1,0,0,0,0,0 +7000,FR,PEUGEOT,107,2009,39000,YELLOW,0,0,0,0,1,1,1,1,0,0,0,0,0 +28000,FR,PEUGEOT,508,2013,10000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0 +7000,FR,PEUGEOT,206,2008,40000,BLACK,0,0,0,0,0,0,1,1,0,0,0,0,0 +32500,DE,MERCEDES,C200,2009,64000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0 +65000,DE,BMW,X6,2011,5200,BLACK,0,0,0,0,0,1,1,1,0,0,0,0,0 +24000,DE,BMW,320,2007,77000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0 +18500,DE,V.W,GOLF,2012,15000,BLACK,0,0,0,0,0,1,1,1,0,0,0,0,0 +14000,DE,V.W,GOLF,2007,95000,NAVY BLUE METALLIC,0,0,0,0,1,1,1,1,0,0,0,0,0 +9500,JP,HONDA,CIVIC,2006,11906,CHAMPAIGNE,1,0,0,0,0,1,1,1,0,1,0,0,0 +12500,JP,HONDA,CIVIC,2012,32180,,1,0,0,0,0,1,1,1,0,0,0,1,0 +22500,JP,TOYOTA,CAMRY,2011,48270,,1,0,0,0,0,1,1,1,0,0,0,0,0 +14000,JP,TOYOTA,COROLLA,2006,160000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +18000,JP,TOYOTA,PRIUS,2010,152855,WHITE,1,0,0,0,0,1,1,1,0,0,0,0,0 +14500,JP,TOYOTA,COROLLA,2010,49000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0 +27300,JP,TOYOTA,CAMRY,2012,46017,BLUE,1,0,0,0,0,1,1,1,0,0,0,0,0 +14500,JP,TOYOTA,CAMRY,2008,94931,CHAMPAIGNE,1,0,0,0,0,1,1,1,0,0,0,0,0 +29750,JP,TOYOTA,CAMRY,2013,5000,BLACK,1,0,0,0,0,1,1,1,0,0,0,0,0 +16300,JP,NISSAN,ALTIMA,2009,60000,BLACK,1,0,0,0,1,1,1,1,0,0,0,0,0 +14850,KR,HYUNDAI,AVANTI,2011,39000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +37900,US,JEEP,CHEROKEE,2012,15000,BLACK,0,0,0,0,1,1,1,1,0,0,0,0,0 +16750,DE,AUDI,A4,2006,90000,NAVY BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0 +47500,DE,AUDI,A5,2011,33000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0 +18000,US,CHEVROLET,TRAILBLAZER,2009,67000,BLACK,0,0,1,0,0,1,1,1,0,0,0,0,0 +20500,JP,HONDA,ACCORD,2008,168000,CHAMPAIGNE,0,0,0,0,0,1,1,1,0,0,0,0,0 +10200,JP,HONDA,CIVIC,2008,88495,SILVER,1,0,0,0,0,1,1,1,0,0,0,0,0 +12250,KR,HYUNDAI,SONATA,2008,82000,SILVER BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0 +14850,KR,HYUNDAI,AVANTI,2011,39000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +13200,KR,HYUNDAI,TUCSON,2006,102000,GREEN,0,0,1,0,1,1,1,1,0,0,0,0,0 +29800,US,HUMMER,H3,2007,75623,,0,0,1,0,0,1,1,1,0,0,0,0,0 +65000,US,JEEP,CHEROKEE,2012,12000,SILVER,0,0,1,0,0,1,1,1,0,0,0,0,0 +10250,KR,KIA,RIO,2008,43000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +9600,KR,KIA,RIO,2009,48000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +7500,KR,KIA,MORNING,2008,55000,BLUE,0,0,0,0,0,0,1,1,0,0,0,0,0 +9000,KR,KIA,PICANTO,2011,17000,,0,0,0,0,0,1,1,1,0,0,0,0,0 +28000,UK,LANDROVER,LR2,2007,83000,BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0 +37500,DE,MERCEDES,E200,2008,54000,CHAMPAIGNE,0,0,0,0,0,1,1,1,0,0,0,0,0 +35000,DE,MERCEDES,E200,2008,82000,BLACK,0,0,0,0,0,1,1,1,0,0,0,0,0 +19000,DE,MERCEDES,ML500,2003,106194,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +10200,JP,MITSUBISHI,LANCER,2010,40000,VINOUS,0,0,0,0,1,1,1,1,0,0,0,0,0 +12000,JP,NISSAN,X-TRAIL,2003,85000,CHAMPAIGNE,0,0,0,0,0,1,1,1,0,0,0,0,0 +16200,JP,NISSAN,ALTIMA,2009,60000,BLACK,1,0,0,0,1,1,1,1,0,0,0,0,0 +12000,JP,NISSAN,SUNNY,2010,45000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0 +8200,DE,OPEL,ASTRA,2001,51000,SILVER BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0 +17000,JP,TOYOTA,PRIUS,2010,32000,BLACK,1,0,0,0,0,1,1,1,0,0,0,0,0 +16450,JP,TOYOTA,CAMRY,2008,128720,GREEN,0,0,0,0,1,1,1,1,0,0,0,0,0 +117500,JP,TOYOTA,CAMRY,2009,74000,SILVER,1,0,0,0,0,1,1,1,0,0,0,0,0 +16800,JP,TOYOTA,CAMRY,2009,60000,SILVER,1,0,0,0,1,1,1,1,0,0,0,0,0 +13000,JP,TOYOTA,PRIUS,2008,85277,RED,1,0,0,0,0,1,1,1,0,0,0,0,0 +26500,DE,V.W,TOUAREG,2008,67000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0 +18500,DE,V.W,GOLF,2012,15000,BLACK,0,0,0,0,1,1,1,1,0,0,0,0,0 +36000,UK,LANDROVER,RANGE ROVER,2004,120000,NAVY BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0 +33500,JP,LEXUS,GS450H,2008,37007,WHITE,1,0,0,0,0,1,1,1,0,0,0,0,0 +26000,JP,LEXUS,IS300,2007,90000,NAVY GREEN,0,0,0,0,0,1,1,1,0,0,0,0,0 +21500,JP,LEXUS,HS250H,2010,26870,VINOUS,1,0,0,0,0,1,1,1,0,0,0,0,0 diff --git a/MachineLearning/Session1/Deck.pdf b/MachineLearning/Session1/Deck.pdf new file mode 100644 index 0000000..e40e684 Binary files /dev/null and b/MachineLearning/Session1/Deck.pdf differ diff --git a/MachineLearning/Session1/regression.py b/MachineLearning/Session1/regression.py new file mode 100644 index 0000000..4ca3bbc --- /dev/null +++ b/MachineLearning/Session1/regression.py @@ -0,0 +1,79 @@ +#This is meant for an interactive Python shell +#Tested on jupyter +#Created by abulyomon@gmail.com for JOSA's Data Science Bootcamp + +#Load the data +import pandas +cars = pandas.read_csv("AlWaseet.csv") +#Take a look +cars.describe() + +#Visualize +import matplotlib.pyplot as plt +plt.xlabel("Milage in KM") +plt.ylabel("Price in JOD") +plt.xlim([0,200000]) + +plt.scatter(cars['MILEAGE'], cars['PRICE']) +plt.show() +#Do they relate? +cars['PRICE'].corr(cars['MILEAGE']) +#FYI +cars['MILEAGE'].corr(cars['PRICE']) +#Let's regress +import statsmodels.formula.api as stats + +formula1 = 'PRICE ~ MILEAGE' +model1 = stats.ols(formula1, data = cars).fit() +model1.summary() + +#Remove outlier +cars = cars[cars.PRICE < max(cars.PRICE)] +model1v2 = stats.ols(formula1, data = cars).fit() +model1v2.summary() + +#Plot again +plt.xlabel("Mileage in KM") +plt.ylabel("Price in JOD") +plt.xlim([0,200000]) +plt.scatter(cars['MILEAGE'], cars['PRICE']) +plt.plot(cars['MILEAGE'], model1v2.predict(cars)) +plt.show() + +#### More variables +import datetime +cars['AGE'] = datetime.date.today().year - cars['YEAR'] #Guess what, ignore the warning! +cars.describe() + +plt.xlabel("Age in Years") +plt.ylabel("Price in JOD") +plt.xlim([0,20]) +plt.scatter(cars['AGE'], cars['PRICE']) + +cars['AGE'].corr(cars['PRICE']) + +formula2 = 'PRICE ~ MILEAGE + AGE' +model2 = stats.ols(formula2, data = cars).fit() +model2.summary() + +formula3 = 'PRICE ~ MILEAGE + AGE + CHASSIS_FR + CHASSIS_FL \ + + CHASSIS_RR + CHASSIS_RL' +model3 = stats.ols(formula3, data = cars).fit() +model3.summary() + +cars['CHASSIS'] = cars['CHASSIS_FR'] + cars['CHASSIS_FL'] + cars['CHASSIS_RR'] \ ++ cars['CHASSIS_RL'] #Ok, yes ignore the warning, again! + +formula3 = 'PRICE ~ MILEAGE + AGE + CHASSIS' +model3 = stats.ols(formula3, data = cars).fit() +model3.summary() + +cars['MILEAGE'].corr(cars['AGE']) +#DAMN! +plt.xlim([0,200000]) +plt.scatter(cars['MILEAGE'],cars['AGE']) +plt.show() + +formula4 = 'PRICE ~ AGE + CHASSIS' +model4 = stats.ols(formula4, data=cars).fit() +model4.summary()