-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmergefeatures.py
68 lines (43 loc) · 2.21 KB
/
mergefeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python
# encoding: utf8
"""
Created on Fri Nov 16 18:56:13 2018
@author: thileepan
This code reads 3 features sets mentioned below and combines them together.
1. Librosa features = 65
2. Amplitude modulation features = 3
3. Sound Pressure levels (A, C and no weighting ) = 3
Total number of features = 71
The final HDF5 file is stored in this location in windows computer
C:\\Users\\tpaulraj\\Projects\\Clustering\\features
Size of the file is about 1.7 GB and dimension is [3011874, 71]
"""
import pandas as pd
import glob
import os
# Read Amplitude Modulation data
os.chdir('C:/Users/tpaulraj/Projects/Clustering/Amplitude_Modulation/10sec_data/') #windows
am_files_list = glob.glob('Amplitude_modulation_dept_10sec_Auris3_*')
AM_full_year = pd.DataFrame()
for file in am_files_list:
temp_am_file = pd.read_csv(file, index_col=[3], parse_dates=[3])
AM_full_year = AM_full_year.append(temp_am_file)
#Read Librosa Data
os.chdir('C:/Users/tpaulraj/Projects/Clustering/Results')
month_list_2016 = ['April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] #creating month list for 2016
month_list_2017 = ['January', 'February', 'March', 'April', 'May'] #creating month list for 2017
pattern_list_2016 = ['{}_2016_feature_data'.format(month) for month in month_list_2016] #creating pattern lists for 2016
pattern_list_2017 = ['{}_2017_feature_data'.format(month) for month in month_list_2017] #creating pattern lists for 2017
pattern_list = pattern_list_2016 + pattern_list_2017 #combining both pattern lists to create a single list
Librosa_data = pd.DataFrame()
for pattern in pattern_list:
print(pattern)
temp_librosa_file = pd.read_csv(pattern, parse_dates=[0], index_col=[0])
Librosa_data = Librosa_data.append(temp_librosa_file)
#Read LA and LC data
os.chdir('C:/Users/tpaulraj/Projects/Clustering/Petri_features')
Sound_Pressure_data = pd.read_hdf('Auris3_LALC_full_year.hdf5')
Sound_Pressure_data = Sound_Pressure_data.iloc[:,1:4] #not taking the AM data in this file
#Combining all features together
os.chdir('C:/Users/tpaulraj/Projects/Clustering/features')
all_features_full_year = pd.concat([Librosa_data, AM_full_year, Sound_Pressure_data], axis=1, join='inner')