diff --git a/Music Recommendation/Spotify Music Recommendation.ipynb b/Music Recommendation/Spotify Music Recommendation.ipynb new file mode 100644 index 0000000..e3687d2 --- /dev/null +++ b/Music Recommendation/Spotify Music Recommendation.ipynb @@ -0,0 +1,409 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Music Recommendation System" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import spotipy\n", + "from spotipy.oauth2 import SpotifyClientCredentials\n", + "import spotipy.util as util\n", + "import configparser" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config = configparser.ConfigParser()\n", + "config.read('config.cfg')\n", + "cid = config.get('SPOTIFY', 'CLIENT_ID')\n", + "secret = config.get('SPOTIFY', 'CLIENT_SECRET')\n", + "username = config.get('SPOTIFY', 'USERNAME')\n", + "url = config.get('SPOTIFY', 'URL')\n", + "userid = config.get('SPOTIFY', 'USERID')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Credentials authentication \n", + "scope = 'user-library-read playlist-read-private playlist-modify-public playlist-modify-private user-library-modify'\n", + "token = util.prompt_for_user_token(username, scope, client_id=cid, client_secret=secret, \n", + " redirect_uri=url)\n", + "if token:\n", + " sp = spotipy.Spotify(auth=token)\n", + "else:\n", + " print(\"Can't get token for\", username)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Create a playlist containing songs you disliked, add atleast 1000 songs to it\n", + "def create_data(userid, playlists):\n", + " tracks = {}\n", + " items = []\n", + " c=0\n", + " for playlist in playlists:\n", + " sourcePlaylist = sp.user_playlist(userid, playlist)\n", + " track = sourcePlaylist[\"tracks\"]\n", + " tracks.update(track)\n", + " items = items + track[\"items\"]\n", + " return [tracks, items]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Enter the ids of playlist which only contains the song you liked seprate each id with space\")\n", + "liked = input().strip().split(\" \")\n", + "print(liked)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Get Good songs ids\n", + "[good_tracks, good_songs] = create_data(userid, liked)\n", + "\n", + "while good_tracks['next']:\n", + " good_tracks = sp.next(good_tracks)\n", + " for item in good_tracks[\"items\"]:\n", + " good_songs.append(item)\n", + "good_ids = []\n", + "\n", + "for i in range(len(good_songs)):\n", + " good_ids.append(good_songs[i]['track']['id'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Enter the ids of playlist which only contains the song you disliked seprate each id with space\")\n", + "disliked = input().strip().split(\" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Get bad songs ids\n", + "[bad_tracks, bad_songs] = create_data(userid, disliked)\n", + "while bad_tracks['next']:\n", + " bad_tracks = sp.next(bad_tracks)\n", + " for item in bad_tracks[\"items\"]:\n", + " bad_songs.append(item)\n", + "bad_ids = []\n", + "for i in range(len(bad_songs)):\n", + " bad_ids.append(bad_songs[i]['track']['id'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "features = []\n", + "print(len(good_ids))\n", + "c=0\n", + "for i in range(0,len(good_ids),50):\n", + " audio_features = sp.audio_features(good_ids[i:i+50])\n", + " for track in audio_features:\n", + " if track != None:\n", + " c += 1\n", + " features.append(track)\n", + " features[-1]['target'] = 1\n", + "print(c)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(len(bad_ids))\n", + "c = 0\n", + "for i in range(0,len(bad_ids),50):\n", + " audio_features = sp.audio_features(bad_ids[i:i+50])\n", + " for track in audio_features:\n", + " if track != None:\n", + " c += 1\n", + " features.append(track)\n", + " features[-1]['target'] = 0\n", + "print(c)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "trainingData = pd.DataFrame(features)\n", + "from sklearn.model_selection import train_test_split\n", + "train, test = train_test_split(trainingData, test_size = 0.10, shuffle=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Graphs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "goods = {}\n", + "bads = {}\n", + "feature = [\"danceability\", \"loudness\", \"valence\", \"energy\", \"instrumentalness\", \"acousticness\", \"key\", \"speechiness\", \"duration_ms\"]\n", + "for fs in feature:\n", + " good = trainingData[trainingData['target'] == 1][fs]\n", + " bad = trainingData[trainingData['target'] == 0][fs]\n", + " goods[fs] = good\n", + " bads[fs] = bad\n", + "for fs in feature:\n", + " fig = plt.figure(figsize=(8,8))\n", + " plt.title(\"Song \"+fs+\" Like / Dislike Distribution\")\n", + " plt.xlabel(fs)\n", + " plt.ylabel(\"No. of songs\")\n", + " goods[fs].hist(alpha=0.7, bins=30, label='positive')\n", + " bads[fs].hist(alpha=0.7, bins=30, label='negative')\n", + " plt.legend(loc='upper right')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and Testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Define the set of features that we want to look at\n", + "feature = [\"danceability\", \"loudness\", \"valence\", \"energy\", \"instrumentalness\", \"acousticness\", \"key\", \"speechiness\", \"duration_ms\"]\n", + "#Split the data into x and y test and train sets to feed them into a bunch of classifiers!\n", + "x_train = train[feature]\n", + "y_train = train[\"target\"]\n", + "x_test = test[feature]\n", + "y_test = test[\"target\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scores = {}\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "dtc = DecisionTreeClassifier(min_samples_split=100)\n", + "dtc.fit(x_train, y_train)\n", + "y_pred = dtc.predict(x_test)\n", + "score = accuracy_score(y_test, y_pred) * 100\n", + "scores['dtc'] = score\n", + "print(\"Accuracy using Decision Tree: \", round(score, 1), \"%\")\n", + "\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "knn = KNeighborsClassifier(9)\n", + "knn.fit(x_train, y_train)\n", + "knn_pred = knn.predict(x_test)\n", + "score = accuracy_score(y_test, knn_pred) * 100\n", + "scores['knn'] = score\n", + "print(\"Accuracy using Knn: \", round(score, 1), \"%\")\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "rfc = RandomForestClassifier(n_estimators=31, random_state=111)\n", + "rfc.fit(x_train, y_train)\n", + "rfc_pred = knn.predict(x_test)\n", + "score = accuracy_score(y_test, rfc_pred) * 100\n", + "scores['rfc'] = score\n", + "print(\"Accuracy using Random Forest classifier: \", round(score, 1), \"%\")\n", + "\n", + "from sklearn.ensemble import AdaBoostClassifier\n", + "abc = AdaBoostClassifier(n_estimators=62, random_state=111)\n", + "abc.fit(x_train, y_train)\n", + "abc_pred = knn.predict(x_test)\n", + "score = accuracy_score(y_test, abc_pred) * 100\n", + "print(\"Accuracy using AdaBoost Classifier: \", round(score, 1), \"%\")\n", + "\n", + "\n", + "from sklearn.ensemble import BaggingClassifier\n", + "bc = BaggingClassifier(n_estimators=9, random_state=111)\n", + "bc.fit(x_train, y_train)\n", + "bc_pred = knn.predict(x_test)\n", + "score = accuracy_score(y_test, bc_pred) * 100\n", + "scores['bc'] = score\n", + "print(\"Accuracy using Bagging Classifier: \", round(score, 1), \"%\")\n", + "\n", + "from sklearn.ensemble import ExtraTreesClassifier\n", + "etc = ExtraTreesClassifier(n_estimators=9, random_state=111)\n", + "etc.fit(x_train, y_train)\n", + "etc_pred = knn.predict(x_test)\n", + "score = accuracy_score(y_test, etc_pred) * 100\n", + "scores['etc'] = score\n", + "print(\"Accuracy using Bagging Classifier: \", round(score, 1), \"%\")\n", + "\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=.1, max_depth=1, random_state=0)\n", + "gbc.fit(x_train, y_train)\n", + "predicted = gbc.predict(x_test)\n", + "score = accuracy_score(y_test, predicted)*100\n", + "scores['gbc'] = score\n", + "print(\"Accuracy using Gbc: \", round(score, 1), \"%\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.xlabel('Classifier')\n", + "plt.ylabel('Accuracy')\n", + "plt.title('Recommendation Accuracy using different classifier')\n", + "plt.bar(range(len(scores)), list(scores.values()), align='center')\n", + "plt.xticks(range(len(scores)), list(scores.keys()))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Adding Recommended songs to playlist" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "print(\"Enter the ids of playlist from which you want recommended songs\")\n", + "like = input().strip().split(\" \")\n", + "title = input(\"Title of Playlist that will contain songs you liked:: \")\n", + "songs = []\n", + "for playlist in like:\n", + " sourcePlaylist = sp.user_playlist(userid, playlist)\n", + " tracks = sourcePlaylist[\"tracks\"]\n", + " song = tracks[\"items\"]\n", + " while tracks['next']:\n", + " tracks = sp.next(tracks)\n", + " for item in tracks[\"items\"]:\n", + " song.append(item)\n", + " ids = [] \n", + " print(len(song))\n", + " print(song[0]['track']['id'])\n", + " i = 0\n", + " for i in range(len(song)):\n", + " songs.append(song[i][\"track\"][\"id\"])\n", + " \n", + "print(len(songs))\n", + "c = 0\n", + "features = []\n", + "for i in range(0,len(songs),50):\n", + " audio_features = sp.audio_features(songs[i:i+50])\n", + " for track in audio_features:\n", + " if track != None:\n", + " c += 1\n", + " features.append(track)\n", + "print(c)\n", + "\n", + "dataset = pd.DataFrame(features)\n", + "test = dataset[feature]\n", + "predicted = gbc.predict(test)\n", + "rec_songs = []\n", + "for i in range(len(predicted)):\n", + " if predicted[i] == 1:\n", + " rec_songs.append(songs[i])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "playlists = sp.user_playlists(userid)['items']\n", + "exist = False\n", + "for playlist in playlists:\n", + " if playlist['name'] == title:\n", + " liked_list = playlist\n", + " exist = True\n", + " break\n", + "if not exist:\n", + " liked_list = sp.user_playlist_create(userid, title, True)\n", + "for song in rec_songs:\n", + " sp.user_playlist_add_tracks(userid, liked_list['id'], [song])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Music Recommendation/config.cfg b/Music Recommendation/config.cfg new file mode 100644 index 0000000..ce5476e --- /dev/null +++ b/Music Recommendation/config.cfg @@ -0,0 +1,6 @@ +[SPOTIFY] +CLIENT_ID= +CLIENT_SECRET= +USERNAME= +URL=http://localhost:8888/tree +USERID=