-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathbuildPlaylistSongMatrix.py
75 lines (60 loc) · 2.49 KB
/
buildPlaylistSongMatrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 20 22:57:56 2018
@author: bking
"""
import pandas as pd
#import argparse
import sys
from helper import alertError,alertFinishJob
import gc
import time
def main(argv):
print("Reading Data")
df_train = pd.read_hdf('data/df_data/df_train_new.hdf')
df_test = pd.read_hdf('data/df_data/df_test_new.hdf')
df_test_truth = pd.read_hdf('data/df_data/df_test_truth_new.hdf')
df_tracks = pd.read_hdf('data/df_data/df_tracks.hdf')
# df_train = pd.read_hdf('data/df_data/df_train.hdf')
# df_test = pd.read_hdf('data/df_data/df_test.hdf')
# df_test_truth = pd.read_hdf('data/df_data/df_test_truth.hdf')
#
# df_truth =
# Build playlist-song matrix
print("Build playlist-song matrix for train set")
tid = df_train.groupby(by='pid')['tid'].apply(list)
pos = df_train.groupby(by='pid')['pos'].apply(list)
df_ps_train = pd.concat([tid,pos],axis=1)
print("Build playlist-song matrix for test set incomplete")
tid = df_test.groupby(by='pid')['tid'].apply(list)
pos = df_test.groupby(by='pid')['pos'].apply(list)
df_ps_test = pd.concat([tid,pos],axis=1)
print("Build playlist-song matrix for test set truth")
tid = df_test_truth.groupby(by='pid')['tid'].apply(list)
pos = df_test_truth.groupby(by='pid')['pos'].apply(list)
df_ps_test_truth = pd.concat([tid,pos],axis=1)
print("Build Complate playlist-song matrix")
tid = df_tracks.groupby(by='pid')['tid'].apply(list)
pos = df_tracks.groupby(by='pid')['pos'].apply(list)
df_ps_complete = pd.concat([tid,pos],axis=1)
print("save matrix Playlist-Songs")
df_ps_train.to_hdf("data/df_data/df_playlistSong/df_ps_train_new.hdf",key='abc')
df_ps_test.to_hdf("data/df_data/df_playlistSong/df_ps_test_new.hdf",key='abc')
df_ps_test_truth.to_hdf("data/df_data/df_playlistSong/df_ps_test_truth_new.hdf",key='abc')
df_ps_complete.to_hdf("data/df_data/df_playlistSong/df_ps_complete_new.hdf",key='abc')
del df_ps_train
del df_ps_test
gc.collect()
if __name__ =="__main__":
# parser = argparse.ArgumentParser()
# parser.add_argument('--sim_metric', default='cosine', type=str, help='Similarity Metrics')
# main(sys.argv)
start = time.time()
main(sys.argv)
print("Time taken = {0:.5f}".format(time.time() - start))
# try:
# main(sys.argv)
# alertFinishJob("Done")
# except Exception as e:
# alertError(str(e))