-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Yu-Hua Chen
authored and
Yu-Hua Chen
committed
May 30, 2019
1 parent
26f4535
commit a8e3536
Showing
14 changed files
with
1,881 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import torch\n", | ||
"import torch.utils.data as data\n", | ||
"import torchvision.transforms as transforms\n", | ||
"import numpy as np\n", | ||
"from pytorch_nsynth_lib.nsynth import NSynth\n", | ||
"from IPython.display import Audio\n", | ||
"\n", | ||
"import librosa\n", | ||
"import librosa.display\n", | ||
"import phase_operation\n", | ||
"from tqdm import tqdm\n", | ||
"import h5py" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import spec_ops as spec_ops\n", | ||
"import phase_operation as phase_op\n", | ||
"import spectrograms_helper as spec_helper" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"train_data = h5py.File('../data/Nsynth_melspec_IF_pitch.hdf5', 'w')\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# audio samples are loaded as an int16 numpy array\n", | ||
"# rescale intensity range as float [-1, 1]\n", | ||
"toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)\n", | ||
"# use instrument_family and instrument_source as classification targets\n", | ||
"dataset = NSynth(\n", | ||
" \"../data/nsynth/nsynth-train\",\n", | ||
" transform=toFloat,\n", | ||
" blacklist_pattern=[ \"string\"], # blacklist string instrument\n", | ||
" categorical_field_list=[\"instrument_family\",\"pitch\"])\n", | ||
"loader = data.DataLoader(dataset, batch_size=1, shuffle=True)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def expand(mat):\n", | ||
" expand_vec = np.expand_dims(mat[:,125],axis=1)\n", | ||
" expanded = np.hstack((mat,expand_vec,expand_vec))\n", | ||
" return expanded" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"spec_list=[]\n", | ||
"pitch_list=[]\n", | ||
"IF_list =[]\n", | ||
"mel_spec_list=[]\n", | ||
"mel_IF_list=[]\n", | ||
"\n", | ||
"pitch_set =set()\n", | ||
"count=0\n", | ||
"for samples, instrument_family, pitch, targets in loader:\n", | ||
" \n", | ||
" pitch = targets['pitch'].data.numpy()[0]\n", | ||
"\n", | ||
" if pitch < 24 or pitch > 84:\n", | ||
"# print(\"pitch\",pitch)\n", | ||
" continue\n", | ||
" \n", | ||
" sample = samples.data.numpy().squeeze()\n", | ||
" spec = librosa.stft(sample, n_fft=2048, hop_length = 512)\n", | ||
" \n", | ||
" magnitude = np.log(np.abs(spec)+ 1.0e-6)[:1024]\n", | ||
"# print(\"magnitude Max\",magnitude.max(),\"magnitude Min\",magnitude.min())\n", | ||
" angle =np.angle(spec)\n", | ||
"# print(\"angle Max\",angle.max(),\"angle Min\",angle.min())\n", | ||
"\n", | ||
" IF = phase_operation.instantaneous_frequency(angle,time_axis=1)[:1024]\n", | ||
" \n", | ||
" magnitude = expand(magnitude)\n", | ||
" IF = expand(IF)\n", | ||
" logmelmag2, mel_p = spec_helper.specgrams_to_melspecgrams(magnitude, IF)\n", | ||
"\n", | ||
"# pitch = targets['pitch'].data.numpy()[0]\n", | ||
" \n", | ||
" \n", | ||
" assert magnitude.shape ==(1024, 128)\n", | ||
" assert IF.shape ==(1024, 128)\n", | ||
" \n", | ||
"# spec_list.append(magnitude)\n", | ||
"# IF_list.append(IF)\n", | ||
" pitch_list.append(pitch)\n", | ||
" mel_spec_list.append(logmelmag2)\n", | ||
" mel_IF_list.append(mel_p)\n", | ||
" pitch_set.add(pitch)\n", | ||
" \n", | ||
" count+=1\n", | ||
" if count%10000==0:\n", | ||
" print(count)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# train_data.create_dataset(\"Spec\", data=spec_list)\n", | ||
"# train_data.create_dataset(\"IF\", data=IF_list)\n", | ||
"train_data.create_dataset(\"pitch\", data=pitch_list)\n", | ||
"train_data.create_dataset(\"mel_Spec\", data=mel_spec_list)\n", | ||
"train_data.create_dataset(\"mel_IF\", data=mel_IF_list)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.