Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Yu-Hua Chen authored and Yu-Hua Chen committed May 30, 2019
1 parent 26f4535 commit a8e3536
Show file tree
Hide file tree
Showing 14 changed files with 1,881 additions and 0 deletions.
298 changes: 298 additions & 0 deletions Inference.ipynb

Large diffs are not rendered by default.

176 changes: 176 additions & 0 deletions Make Training Data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.utils.data as data\n",
"import torchvision.transforms as transforms\n",
"import numpy as np\n",
"from pytorch_nsynth_lib.nsynth import NSynth\n",
"from IPython.display import Audio\n",
"\n",
"import librosa\n",
"import librosa.display\n",
"import phase_operation\n",
"from tqdm import tqdm\n",
"import h5py"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import spec_ops as spec_ops\n",
"import phase_operation as phase_op\n",
"import spectrograms_helper as spec_helper"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"train_data = h5py.File('../data/Nsynth_melspec_IF_pitch.hdf5', 'w')\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# audio samples are loaded as an int16 numpy array\n",
"# rescale intensity range as float [-1, 1]\n",
"toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max)\n",
"# use instrument_family and instrument_source as classification targets\n",
"dataset = NSynth(\n",
" \"../data/nsynth/nsynth-train\",\n",
" transform=toFloat,\n",
" blacklist_pattern=[ \"string\"], # blacklist string instrument\n",
" categorical_field_list=[\"instrument_family\",\"pitch\"])\n",
"loader = data.DataLoader(dataset, batch_size=1, shuffle=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def expand(mat):\n",
" expand_vec = np.expand_dims(mat[:,125],axis=1)\n",
" expanded = np.hstack((mat,expand_vec,expand_vec))\n",
" return expanded"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spec_list=[]\n",
"pitch_list=[]\n",
"IF_list =[]\n",
"mel_spec_list=[]\n",
"mel_IF_list=[]\n",
"\n",
"pitch_set =set()\n",
"count=0\n",
"for samples, instrument_family, pitch, targets in loader:\n",
" \n",
" pitch = targets['pitch'].data.numpy()[0]\n",
"\n",
" if pitch < 24 or pitch > 84:\n",
"# print(\"pitch\",pitch)\n",
" continue\n",
" \n",
" sample = samples.data.numpy().squeeze()\n",
" spec = librosa.stft(sample, n_fft=2048, hop_length = 512)\n",
" \n",
" magnitude = np.log(np.abs(spec)+ 1.0e-6)[:1024]\n",
"# print(\"magnitude Max\",magnitude.max(),\"magnitude Min\",magnitude.min())\n",
" angle =np.angle(spec)\n",
"# print(\"angle Max\",angle.max(),\"angle Min\",angle.min())\n",
"\n",
" IF = phase_operation.instantaneous_frequency(angle,time_axis=1)[:1024]\n",
" \n",
" magnitude = expand(magnitude)\n",
" IF = expand(IF)\n",
" logmelmag2, mel_p = spec_helper.specgrams_to_melspecgrams(magnitude, IF)\n",
"\n",
"# pitch = targets['pitch'].data.numpy()[0]\n",
" \n",
" \n",
" assert magnitude.shape ==(1024, 128)\n",
" assert IF.shape ==(1024, 128)\n",
" \n",
"# spec_list.append(magnitude)\n",
"# IF_list.append(IF)\n",
" pitch_list.append(pitch)\n",
" mel_spec_list.append(logmelmag2)\n",
" mel_IF_list.append(mel_p)\n",
" pitch_set.add(pitch)\n",
" \n",
" count+=1\n",
" if count%10000==0:\n",
" print(count)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# train_data.create_dataset(\"Spec\", data=spec_list)\n",
"# train_data.create_dataset(\"IF\", data=IF_list)\n",
"train_data.create_dataset(\"pitch\", data=pitch_list)\n",
"train_data.create_dataset(\"mel_Spec\", data=mel_spec_list)\n",
"train_data.create_dataset(\"mel_IF\", data=mel_IF_list)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit a8e3536

Please sign in to comment.