Skip to content

Commit

Permalink
Merge pull request zaidfarekh#2 from abulyomon/master
Browse files Browse the repository at this point in the history
Initial files - ML Session by Yaman
  • Loading branch information
mjalajel committed Dec 19, 2015
2 parents 9d5d37c + 069def6 commit 953400d
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 0 deletions.
Binary file added MachineLearning/Session1/AlWaseet Listing.ods
Binary file not shown.
73 changes: 73 additions & 0 deletions MachineLearning/Session1/AlWaseet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
PRICE,ORIGIN,MAKE,MODEL,YEAR,MILEAGE,COLOR,HYBRID,DIESEL,4WD,CONVERTIBLE,LEATHER,AUTOTRANSMISSION,AUTOWINDOWS,AC,HEATEDSEATS,CHASSIS_FR,CHASSIS_FL,CHASSIS_RR,CHASSIS_RL
27500,JP,TOYOTA,PRADO,2009,80000,BLACK,0,0,1,0,0,1,1,1,0,0,0,0,0
10700,FR,CITROEN,C4,2007,65000,BLACK,0,0,0,0,0,0,1,1,0,0,0,0,0
32000,JP,LEXUS,GS450,2007,90000,,1,0,0,0,0,1,1,1,0,0,0,0,0
8700,US,CHEVROLET,AVEO,2009,60000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0
18000,US,CHEVROLET,TRAILBLAZER,2009,67000,BLUE,0,0,1,0,0,1,1,1,0,0,0,0,0
17000,US,FORD,MUSTANG,2002,92000,NAVY BLUE,0,0,0,0,0,0,1,1,0,0,0,0,0
9100,KR,HYUNDAI,I10,2013,12000,VINOUS,0,0,0,0,0,0,1,1,0,0,0,0,0
12700,KR,HYUNDAI,AVANTI,2010,27000,SILVER,0,0,0,0,0,1,1,1,1,0,0,0,0
11850,KR,HYUNDAI,AVANTI,2008,37000,SILVER,0,0,0,0,0,1,1,1,1,0,0,0,0
20500,KR,HYUNDAI,SONATA,2011,20000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0
65000,US,JEEP,CHEROKEE,2012,12000,SILVER,0,0,1,0,0,1,1,1,0,0,0,0,0
13500,KR,KIA,CERATO,2010,27000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
14300,KR,KIA,SPORTAGE,2008,47000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
32500,DE,MERCEDES,C200,2009,64000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0
22500,DE,MERCEDES,C180,2005,74000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
17200,JP,NISSAN,ALTIMA,2008,65000,BLACK,0,0,0,0,1,1,1,1,0,0,0,0,0
18800,DE,OPEL,ASTRA,2011,24000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0
11400,FR,PEUGEOT,308,2008,66000,GREEN,0,0,0,0,0,1,1,1,0,0,0,0,0
7000,FR,PEUGEOT,107,2009,39000,YELLOW,0,0,0,0,1,1,1,1,0,0,0,0,0
28000,FR,PEUGEOT,508,2013,10000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0
7000,FR,PEUGEOT,206,2008,40000,BLACK,0,0,0,0,0,0,1,1,0,0,0,0,0
32500,DE,MERCEDES,C200,2009,64000,GRAY,0,0,0,0,0,1,1,1,0,0,0,0,0
65000,DE,BMW,X6,2011,5200,BLACK,0,0,0,0,0,1,1,1,0,0,0,0,0
24000,DE,BMW,320,2007,77000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0
18500,DE,V.W,GOLF,2012,15000,BLACK,0,0,0,0,0,1,1,1,0,0,0,0,0
14000,DE,V.W,GOLF,2007,95000,NAVY BLUE METALLIC,0,0,0,0,1,1,1,1,0,0,0,0,0
9500,JP,HONDA,CIVIC,2006,11906,CHAMPAIGNE,1,0,0,0,0,1,1,1,0,1,0,0,0
12500,JP,HONDA,CIVIC,2012,32180,,1,0,0,0,0,1,1,1,0,0,0,1,0
22500,JP,TOYOTA,CAMRY,2011,48270,,1,0,0,0,0,1,1,1,0,0,0,0,0
14000,JP,TOYOTA,COROLLA,2006,160000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
18000,JP,TOYOTA,PRIUS,2010,152855,WHITE,1,0,0,0,0,1,1,1,0,0,0,0,0
14500,JP,TOYOTA,COROLLA,2010,49000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0
27300,JP,TOYOTA,CAMRY,2012,46017,BLUE,1,0,0,0,0,1,1,1,0,0,0,0,0
14500,JP,TOYOTA,CAMRY,2008,94931,CHAMPAIGNE,1,0,0,0,0,1,1,1,0,0,0,0,0
29750,JP,TOYOTA,CAMRY,2013,5000,BLACK,1,0,0,0,0,1,1,1,0,0,0,0,0
16300,JP,NISSAN,ALTIMA,2009,60000,BLACK,1,0,0,0,1,1,1,1,0,0,0,0,0
14850,KR,HYUNDAI,AVANTI,2011,39000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
37900,US,JEEP,CHEROKEE,2012,15000,BLACK,0,0,0,0,1,1,1,1,0,0,0,0,0
16750,DE,AUDI,A4,2006,90000,NAVY BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0
47500,DE,AUDI,A5,2011,33000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0
18000,US,CHEVROLET,TRAILBLAZER,2009,67000,BLACK,0,0,1,0,0,1,1,1,0,0,0,0,0
20500,JP,HONDA,ACCORD,2008,168000,CHAMPAIGNE,0,0,0,0,0,1,1,1,0,0,0,0,0
10200,JP,HONDA,CIVIC,2008,88495,SILVER,1,0,0,0,0,1,1,1,0,0,0,0,0
12250,KR,HYUNDAI,SONATA,2008,82000,SILVER BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0
14850,KR,HYUNDAI,AVANTI,2011,39000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
13200,KR,HYUNDAI,TUCSON,2006,102000,GREEN,0,0,1,0,1,1,1,1,0,0,0,0,0
29800,US,HUMMER,H3,2007,75623,,0,0,1,0,0,1,1,1,0,0,0,0,0
65000,US,JEEP,CHEROKEE,2012,12000,SILVER,0,0,1,0,0,1,1,1,0,0,0,0,0
10250,KR,KIA,RIO,2008,43000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
9600,KR,KIA,RIO,2009,48000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
7500,KR,KIA,MORNING,2008,55000,BLUE,0,0,0,0,0,0,1,1,0,0,0,0,0
9000,KR,KIA,PICANTO,2011,17000,,0,0,0,0,0,1,1,1,0,0,0,0,0
28000,UK,LANDROVER,LR2,2007,83000,BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0
37500,DE,MERCEDES,E200,2008,54000,CHAMPAIGNE,0,0,0,0,0,1,1,1,0,0,0,0,0
35000,DE,MERCEDES,E200,2008,82000,BLACK,0,0,0,0,0,1,1,1,0,0,0,0,0
19000,DE,MERCEDES,ML500,2003,106194,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
10200,JP,MITSUBISHI,LANCER,2010,40000,VINOUS,0,0,0,0,1,1,1,1,0,0,0,0,0
12000,JP,NISSAN,X-TRAIL,2003,85000,CHAMPAIGNE,0,0,0,0,0,1,1,1,0,0,0,0,0
16200,JP,NISSAN,ALTIMA,2009,60000,BLACK,1,0,0,0,1,1,1,1,0,0,0,0,0
12000,JP,NISSAN,SUNNY,2010,45000,WHITE,0,0,0,0,0,1,1,1,0,0,0,0,0
8200,DE,OPEL,ASTRA,2001,51000,SILVER BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0
17000,JP,TOYOTA,PRIUS,2010,32000,BLACK,1,0,0,0,0,1,1,1,0,0,0,0,0
16450,JP,TOYOTA,CAMRY,2008,128720,GREEN,0,0,0,0,1,1,1,1,0,0,0,0,0
117500,JP,TOYOTA,CAMRY,2009,74000,SILVER,1,0,0,0,0,1,1,1,0,0,0,0,0
16800,JP,TOYOTA,CAMRY,2009,60000,SILVER,1,0,0,0,1,1,1,1,0,0,0,0,0
13000,JP,TOYOTA,PRIUS,2008,85277,RED,1,0,0,0,0,1,1,1,0,0,0,0,0
26500,DE,V.W,TOUAREG,2008,67000,SILVER,0,0,0,0,0,1,1,1,0,0,0,0,0
18500,DE,V.W,GOLF,2012,15000,BLACK,0,0,0,0,1,1,1,1,0,0,0,0,0
36000,UK,LANDROVER,RANGE ROVER,2004,120000,NAVY BLUE,0,0,0,0,0,1,1,1,0,0,0,0,0
33500,JP,LEXUS,GS450H,2008,37007,WHITE,1,0,0,0,0,1,1,1,0,0,0,0,0
26000,JP,LEXUS,IS300,2007,90000,NAVY GREEN,0,0,0,0,0,1,1,1,0,0,0,0,0
21500,JP,LEXUS,HS250H,2010,26870,VINOUS,1,0,0,0,0,1,1,1,0,0,0,0,0
Binary file added MachineLearning/Session1/Deck.pdf
Binary file not shown.
79 changes: 79 additions & 0 deletions MachineLearning/Session1/regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#This is meant for an interactive Python shell
#Tested on jupyter
#Created by [email protected] for JOSA's Data Science Bootcamp

#Load the data
import pandas
cars = pandas.read_csv("AlWaseet.csv")
#Take a look
cars.describe()

#Visualize
import matplotlib.pyplot as plt
plt.xlabel("Milage in KM")
plt.ylabel("Price in JOD")
plt.xlim([0,200000])

plt.scatter(cars['MILEAGE'], cars['PRICE'])
plt.show()
#Do they relate?
cars['PRICE'].corr(cars['MILEAGE'])
#FYI
cars['MILEAGE'].corr(cars['PRICE'])
#Let's regress
import statsmodels.formula.api as stats

formula1 = 'PRICE ~ MILEAGE'
model1 = stats.ols(formula1, data = cars).fit()
model1.summary()

#Remove outlier
cars = cars[cars.PRICE < max(cars.PRICE)]
model1v2 = stats.ols(formula1, data = cars).fit()
model1v2.summary()

#Plot again
plt.xlabel("Mileage in KM")
plt.ylabel("Price in JOD")
plt.xlim([0,200000])
plt.scatter(cars['MILEAGE'], cars['PRICE'])
plt.plot(cars['MILEAGE'], model1v2.predict(cars))
plt.show()

#### More variables
import datetime
cars['AGE'] = datetime.date.today().year - cars['YEAR'] #Guess what, ignore the warning!
cars.describe()

plt.xlabel("Age in Years")
plt.ylabel("Price in JOD")
plt.xlim([0,20])
plt.scatter(cars['AGE'], cars['PRICE'])

cars['AGE'].corr(cars['PRICE'])

formula2 = 'PRICE ~ MILEAGE + AGE'
model2 = stats.ols(formula2, data = cars).fit()
model2.summary()

formula3 = 'PRICE ~ MILEAGE + AGE + CHASSIS_FR + CHASSIS_FL \
+ CHASSIS_RR + CHASSIS_RL'
model3 = stats.ols(formula3, data = cars).fit()
model3.summary()

cars['CHASSIS'] = cars['CHASSIS_FR'] + cars['CHASSIS_FL'] + cars['CHASSIS_RR'] \
+ cars['CHASSIS_RL'] #Ok, yes ignore the warning, again!

formula3 = 'PRICE ~ MILEAGE + AGE + CHASSIS'
model3 = stats.ols(formula3, data = cars).fit()
model3.summary()

cars['MILEAGE'].corr(cars['AGE'])
#DAMN!
plt.xlim([0,200000])
plt.scatter(cars['MILEAGE'],cars['AGE'])
plt.show()

formula4 = 'PRICE ~ AGE + CHASSIS'
model4 = stats.ols(formula4, data=cars).fit()
model4.summary()

0 comments on commit 953400d

Please sign in to comment.