1
+ import pandas as pd
2
+ import matplotlib .pyplot as plt
3
+ import seaborn as sns
4
+ from sklearn .preprocessing import LabelEncoder
5
+ from kmodes .kmodes import KModes
6
+ from kmodes .kprototypes import KPrototypes
7
+ import pickle
8
+ from pathlib import Path
9
+ import matplotlib .pyplot as plt
10
+ import seaborn as sns
11
+ from sklearn .preprocessing import StandardScaler
12
+
13
+ class Cust_segment :
14
+ def __init__ (self ):
15
+ self .data = data
16
+
17
+ def read_data (self ):
18
+ # import dataset
19
+ df = pd .read_csv (data , sep = "\t " )
20
+
21
+ # menampilkan data
22
+ print (df .head ())
23
+
24
+ # Menampilkan informasi data
25
+ print (df .info ())
26
+
27
+ return df
28
+
29
+ def eksplorasi_dataNumerik (self , df ):
30
+ sns .set (style = 'white' )
31
+ plt .clf ()
32
+
33
+ # Fungsi untuk membuat plot
34
+ def observasi_num (features ):
35
+ fig , axs = plt .subplots (2 , 2 , figsize = (10 , 9 ))
36
+ for i , kol in enumerate (features ):
37
+ sns .boxplot (df [kol ], ax = axs [i ][0 ])
38
+ sns .distplot (df [kol ], ax = axs [i ][1 ])
39
+ axs [i ][0 ].set_title ('mean = %.2f\n median = %.2f\n std = %.2f' % (df [kol ].mean (), df [kol ].median (), df [kol ].std ()))
40
+ plt .setp (axs )
41
+ plt .tight_layout ()
42
+ plt .show ()
43
+
44
+ # Memanggil fungsi untuk membuat Plot untuk data numerik
45
+ kolom_numerik = ['Umur' ,'NilaiBelanjaSetahun' ]
46
+ observasi_num (kolom_numerik )
47
+
48
+ def eksplorasi_dataKategorikal (self , df ):
49
+ sns .set (style = 'white' )
50
+ plt .clf ()
51
+
52
+ # Menyiapkan kolom kategorikal
53
+ kolom_kategorikal = ['Jenis Kelamin' ,'Profesi' ,'Tipe Residen' ]
54
+
55
+ # Membuat canvas
56
+ fig , axs = plt .subplots (3 ,1 ,figsize = (7 ,10 ))
57
+
58
+ # Membuat plot untuk setiap kolom kategorikal
59
+ for i , kol in enumerate (kolom_kategorikal ):
60
+ # Membuat Plot
61
+ sns .countplot (df [kol ], order = df [kol ].value_counts ().index , ax = axs [i ])
62
+ axs [i ].set_title ('\n Count Plot %s\n ' % (kol ), fontsize = 15 )
63
+
64
+ # Memberikan anotasi
65
+ for p in axs [i ].patches :
66
+ axs [i ].annotate (format (p .get_height (), '.0f' ),
67
+ (p .get_x () + p .get_width () / 2. , p .get_height ()),
68
+ ha = 'center' ,
69
+ va = 'center' ,
70
+ xytext = (0 , 10 ),
71
+ textcoords = 'offset points' )
72
+
73
+ # Setting Plot
74
+ sns .despine (right = True ,top = True , left = True )
75
+ axs [i ].axes .yaxis .set_visible (False )
76
+ plt .setp (axs [i ])
77
+ # plt.setp(ax)
78
+ plt .tight_layout ()
79
+
80
+ # Tampilkan plot
81
+ plt .show ()
82
+
83
+ def preparation_data (self , df ):
84
+ # Standarisasi Kolom Numerik
85
+ kolom_numerik = ['Umur' ,'NilaiBelanjaSetahun' ]
86
+
87
+ # Statistik sebelum Standardisasi
88
+ print ('Statistik Sebelum Standardisasi\n ' )
89
+ print (df [kolom_numerik ].describe ().round (1 ))
90
+
91
+ # Standardisasi
92
+ df_std = StandardScaler ().fit_transform (df [kolom_numerik ])
93
+
94
+ # Membuat DataFrame
95
+ df_std = pd .DataFrame (data = df_std , index = df .index , columns = df [kolom_numerik ].columns )
96
+
97
+ # Menampilkan contoh isi data dan summary statistic
98
+ print ('Contoh hasil standardisasi\n ' )
99
+ print (df_std .head ())
100
+
101
+ print ('Statistik hasil standardisasi\n ' )
102
+ print (df_std .describe ().round (0 ))
103
+
104
+ # Konversi Kategorikal Data
105
+ # Inisiasi nama kolom kategorikal
106
+ kolom_kategorikal = ['Jenis Kelamin' ,'Profesi' ,'Tipe Residen' ]
107
+
108
+ # Membuat salinan data frame
109
+ df_encode = df [kolom_kategorikal ].copy ()
110
+
111
+ # Melakukan labelEncoder untuk semua kolom kategorikal
112
+ for col in kolom_kategorikal :
113
+ df_encode [col ]= LabelEncoder ().fit_transform (df_encode [col ])
114
+
115
+ # Menampilkan data
116
+ print (df_encode .head ())
117
+
118
+ # Menggabungkan data frame
119
+ df_model = df_encode .merge (df_std , left_index = True , right_index = True , how = 'left' )
120
+ print (df_model .head ())
121
+
122
+
123
+ data = "https://dqlab-dataset.s3-ap-southeast-1.amazonaws.com/customer_segments.txt"
124
+
125
+ app = Cust_segment ()
126
+ data_raw = app .read_data ()
127
+ app .eksplorasi_dataNumerik (data_raw )
128
+ app .eksplorasi_dataKategorikal (data_raw )
129
+ final_data = app .preparation_data (data_raw )
0 commit comments