-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFor_Testing_Only.py
67 lines (59 loc) · 2.4 KB
/
For_Testing_Only.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from Extract_Polarties import extract_Weighted_Product_Polarities
#extract_Weighted_Product_Polarities()
#Airline Data Example
#Calculating Skewness Statistic For Flight Time Using Python
#You can download dataset from http://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236
import pandas as pd
import matplotlib.pylab as plt
from sklearn import preprocessing
from scipy.stats import skew
from scipy.stats import boxcox
import numpy as np
#First we import the data
#data = pd.read_csv('C:/Users/Yassien\Downloads/On_Time_On_Time_Performance_2015_1/On_Time_On_Time_Performance_2015_1.csv')
#Replace Missing Values with zero
#data['AirTime'].fillna(0,inplace=True)
'''
#The next line uses scale method from scikit-learn to transform the distribution
#This will not impact Skewness Statistic calculation
#We have included this for sake of completion
#Note that we changed the following line to process the square roots instead of actuals
print(type(data['AirTime']))
AirTime = preprocessing.scale(np.sqrt(data['AirTime']))
print(type(AirTime))
#Note that we shift the values by 1 to get rid of zeros
AirTimeBoxCox = preprocessing.scale(boxcox(data['AirTime']+1)[0])
AirTimeOrig = preprocessing.scale(data['AirTime'])
#Next We calculate Skewness using skew in spicy.stats
skness = skew(AirTime)
print("sqrt sk "+str(skness))
sknessBoxCox = skew(AirTimeBoxCox)
print("boxcox sk "+str(sknessBoxCox))
sknessOrig = skew(AirTimeOrig)
print("orig sk "+str(sknessOrig))
#We draw the histograms
figure = plt.figure()
figure.add_subplot(131)
plt.hist(AirTime,facecolor='red',alpha=0.75)
plt.xlabel("AirTime - Transformed(Using Sqrt)")
plt.title("Transformed AirTime Histogram")
plt.text(2,100000,"Skewness: {0:.2f}".format(skness))
figure.add_subplot(132)
plt.hist(AirTimeBoxCox,facecolor='blue',alpha=0.75)
plt.xlabel("AirTime - Using BoxCox Transformation")
plt.title("AirTime Histogram - Un-Skewed(BoxCox)")
plt.text(2,100000,"Skewness: {0:.2f}".format(sknessBoxCox))
figure.add_subplot(133)
plt.hist(AirTimeOrig,facecolor='green',alpha=0.75)
plt.xlabel("AirTime - Based on Original Flight Times")
plt.title("AirTime Histogram - Right Skewed")
plt.text(2,100000,"Skewness: {0:.2f}".format(sknessOrig))
plt.show()
'''
from Resolve_Skewness import resolve_Skewness_For_Feature
#resolve_Skewness_For_Feature(data['AirTime'],"AirTime")
import math
import numpy as np
orig_feature= [1000000,100000000,100100000000]
val = np.sqrt(orig_feature)
print(val)