diff --git a/Sambert_Voice_Cloning_in_One_Click.ipynb b/Sambert_Voice_Cloning_in_One_Click.ipynb
new file mode 100644
index 0000000..ad82bec
--- /dev/null
+++ b/Sambert_Voice_Cloning_in_One_Click.ipynb
@@ -0,0 +1,2858 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "machine_shape": "hm",
+ "gpuType": "V100",
+ "authorship_tag": "ABX9TyPBOWBw9vF2Wuw2s1/2u4qn",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# 全新中文声音克隆 Voice Cloning for Chinese Speech"
+ ],
+ "metadata": {
+ "id": "Uhhc4_stcdSf"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## 配置环境 Set up"
+ ],
+ "metadata": {
+ "id": "qIFF53SWVDe-"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "! nvidia-smi # 需要使用GPU运行"
+ ],
+ "metadata": {
+ "id": "4RZJ1P69VKLU",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "4bf174fb-3131-47ce-a5d5-def6caba4ad4"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Sat Sep 9 07:28:21 2023 \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |\n",
+ "|-------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|===============================+======================+======================|\n",
+ "| 0 Tesla V100-SXM2... Off | 00000000:00:04.0 Off | 0 |\n",
+ "| N/A 37C P0 25W / 300W | 0MiB / 16384MiB | 0% Default |\n",
+ "| | | N/A |\n",
+ "+-------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=============================================================================|\n",
+ "| No running processes found |\n",
+ "+-----------------------------------------------------------------------------+\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!git clone https://huggingface.co/spaces/kevinwang676/Personal-TTS-v2"
+ ],
+ "metadata": {
+ "id": "e_OKqHl1gwlJ",
+ "outputId": "3dea2e2b-1e1c-4530-e6ee-432cf1db1b12",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "fatal: destination path 'Personal-TTS-v2' already exists and is not an empty directory.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cd Personal-TTS-v2"
+ ],
+ "metadata": {
+ "id": "6vcoOVAag0eP",
+ "outputId": "0eff3546-030f-4755-eb47-921c01bf20bb",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/Personal-TTS-v2\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "! pip install openai-whisper\n",
+ "! pip install modelscope\n",
+ "! pip install tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
+ "! pip install typeguard==2.3.1\n",
+ "! pip install sox\n",
+ "! pip install bitstring\n",
+ "! pip install pysptk --no-build-isolation\n",
+ "! pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
+ "! pip install pytorch_wavelets\n",
+ "! pip install tensorboardX\n",
+ "! git clone https://github.com/fbcotter/pytorch_wavelets\n",
+ "! pip install matplotlib\n",
+ "! pip install numpy==1.22.0\n",
+ "! apt-get install sox"
+ ],
+ "metadata": {
+ "id": "s2aAbOEPaVh6",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "outputId": "7b7c59de-cc9b-4529-e718-97a89c902713"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: openai-whisper in /usr/local/lib/python3.10/dist-packages (20230314)\n",
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.0.0)\n",
+ "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.56.4)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (1.22.0)\n",
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.0.1+cu118)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (4.66.1)\n",
+ "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (10.1.0)\n",
+ "Requirement already satisfied: tiktoken==0.3.1 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.3.1)\n",
+ "Requirement already satisfied: ffmpeg-python==0.2.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.2.0)\n",
+ "Requirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from ffmpeg-python==0.2.0->openai-whisper) (0.18.3)\n",
+ "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken==0.3.1->openai-whisper) (2023.6.3)\n",
+ "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken==0.3.1->openai-whisper) (2.31.0)\n",
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (3.27.4.1)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (3.12.2)\n",
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (16.0.6)\n",
+ "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (0.39.1)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (67.7.2)\n",
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (4.5.0)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (1.12)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1.2)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (3.2.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (3.4)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (2.0.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (2023.7.22)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->openai-whisper) (2.1.3)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->openai-whisper) (1.3.0)\n",
+ "Requirement already satisfied: modelscope in /usr/local/lib/python3.10/dist-packages (1.9.0)\n",
+ "Requirement already satisfied: addict in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.4.0)\n",
+ "Requirement already satisfied: attrs in /usr/local/lib/python3.10/dist-packages (from modelscope) (23.1.0)\n",
+ "Requirement already satisfied: datasets<=2.13.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.13.0)\n",
+ "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from modelscope) (0.6.1)\n",
+ "Requirement already satisfied: filelock>=3.3.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (3.12.2)\n",
+ "Requirement already satisfied: gast>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from modelscope) (0.4.0)\n",
+ "Requirement already satisfied: ms-swift in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.0.0)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.22.0)\n",
+ "Requirement already satisfied: oss2 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.18.1)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.5.3)\n",
+ "Requirement already satisfied: Pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (9.4.0)\n",
+ "Requirement already satisfied: pyarrow!=9.0.0,>=6.0.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (13.0.0)\n",
+ "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.8.2)\n",
+ "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from modelscope) (6.0.1)\n",
+ "Requirement already satisfied: requests>=2.25 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.31.0)\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.10.1)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from modelscope) (67.7.2)\n",
+ "Requirement already satisfied: simplejson>=3.3.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (3.19.1)\n",
+ "Requirement already satisfied: sortedcontainers>=1.5.9 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.4.0)\n",
+ "Requirement already satisfied: tqdm>=4.64.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (4.66.1)\n",
+ "Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.0.4)\n",
+ "Requirement already satisfied: yapf in /usr/local/lib/python3.10/dist-packages (from modelscope) (0.40.1)\n",
+ "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (0.3.6)\n",
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (3.3.0)\n",
+ "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (0.70.14)\n",
+ "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (2023.6.0)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (3.8.5)\n",
+ "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (0.16.4)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (23.1)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.1->modelscope) (1.16.0)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (3.2.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (3.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (2023.7.22)\n",
+ "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.22.0)\n",
+ "Requirement already satisfied: diffusers>=0.18.0 in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.20.2)\n",
+ "Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.5.0)\n",
+ "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.3.3)\n",
+ "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (2.13.0)\n",
+ "Requirement already satisfied: transformers>=4.12.0 in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (4.33.1)\n",
+ "Requirement already satisfied: crcmod>=1.7 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (1.7)\n",
+ "Requirement already satisfied: pycryptodome>=3.4.7 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (3.18.0)\n",
+ "Requirement already satisfied: aliyun-python-sdk-kms>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (2.16.1)\n",
+ "Requirement already satisfied: aliyun-python-sdk-core>=2.13.12 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (2.13.36)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->modelscope) (2023.3.post1)\n",
+ "Requirement already satisfied: importlib-metadata>=6.6.0 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (6.8.0)\n",
+ "Requirement already satisfied: platformdirs>=3.5.1 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (3.10.0)\n",
+ "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (2.0.1)\n",
+ "Requirement already satisfied: jmespath<1.0.0,>=0.9.3 in /usr/local/lib/python3.10/dist-packages (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (0.10.0)\n",
+ "Requirement already satisfied: cryptography>=2.6.0 in /usr/local/lib/python3.10/dist-packages (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (41.0.3)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.18.0->ms-swift->modelscope) (2023.6.3)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (6.0.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (4.0.3)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.9.2)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.4.0)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.3.1)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets<=2.13.0,>=2.8.0->modelscope) (4.5.0)\n",
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=6.6.0->yapf->modelscope) (3.16.2)\n",
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.12.0->ms-swift->modelscope) (0.13.3)\n",
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate->ms-swift->modelscope) (5.9.5)\n",
+ "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate->ms-swift->modelscope) (2.0.1+cu118)\n",
+ "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (1.4.0)\n",
+ "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (1.57.0)\n",
+ "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (2.17.3)\n",
+ "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (1.0.0)\n",
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (3.4.4)\n",
+ "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (3.20.3)\n",
+ "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (0.7.1)\n",
+ "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (2.3.7)\n",
+ "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (0.41.2)\n",
+ "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (1.15.1)\n",
+ "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (5.3.1)\n",
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (0.3.0)\n",
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (4.9)\n",
+ "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->ms-swift->modelscope) (1.3.1)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (1.12)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (3.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (3.1.2)\n",
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (2.0.0)\n",
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate->ms-swift->modelscope) (3.27.4.1)\n",
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate->ms-swift->modelscope) (16.0.6)\n",
+ "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->ms-swift->modelscope) (2.1.3)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (2.21)\n",
+ "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (0.5.0)\n",
+ "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->ms-swift->modelscope) (3.2.2)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate->ms-swift->modelscope) (1.3.0)\n",
+ "Looking in links: https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
+ "Requirement already satisfied: tts-autolabel in /usr/local/lib/python3.10/dist-packages (1.1.8)\n",
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.0.1+cu118)\n",
+ "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.0.2+cu118)\n",
+ "Requirement already satisfied: onnxruntime in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.15.1)\n",
+ "Requirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (0.10.1)\n",
+ "Requirement already satisfied: numpy<=1.23.1 in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.22.0)\n",
+ "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.4.1)\n",
+ "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (6.0.1)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (4.66.1)\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.10.1)\n",
+ "Requirement already satisfied: nls-fa in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (0.1)\n",
+ "Requirement already satisfied: kaldi-native-fbank in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.18.4)\n",
+ "Requirement already satisfied: typeguard<=2.13.3 in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.3.1)\n",
+ "Requirement already satisfied: ttsfrd>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (0.2.1)\n",
+ "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (3.0.0)\n",
+ "Collecting numpy<=1.23.1 (from tts-autolabel)\n",
+ " Using cached numpy-1.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.0 MB)\n",
+ "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.2.2)\n",
+ "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.3.2)\n",
+ "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (4.4.2)\n",
+ "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.56.4)\n",
+ "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.12.1)\n",
+ "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.7.0)\n",
+ "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.3.6)\n",
+ "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (4.5.0)\n",
+ "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.3)\n",
+ "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.0.5)\n",
+ "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (15.0.1)\n",
+ "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (23.5.26)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (23.1)\n",
+ "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (3.20.3)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (1.12)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.12.2)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.1.2)\n",
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (2.0.0)\n",
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->tts-autolabel) (3.27.4.1)\n",
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->tts-autolabel) (16.0.6)\n",
+ "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->tts-autolabel) (0.39.1)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->tts-autolabel) (67.7.2)\n",
+ "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->tts-autolabel) (3.10.0)\n",
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->tts-autolabel) (2.31.0)\n",
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->tts-autolabel) (3.2.0)\n",
+ "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa->tts-autolabel) (1.15.1)\n",
+ "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->onnxruntime->tts-autolabel) (10.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->tts-autolabel) (2.1.3)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime->tts-autolabel) (1.3.0)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa->tts-autolabel) (2.21)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (3.2.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (3.4)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (2.0.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (2023.7.22)\n",
+ "Installing collected packages: numpy\n",
+ " Attempting uninstall: numpy\n",
+ " Found existing installation: numpy 1.22.0\n",
+ " Uninstalling numpy-1.22.0:\n",
+ " Successfully uninstalled numpy-1.22.0\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed numpy-1.23.1\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.colab-display-data+json": {
+ "pip_warning": {
+ "packages": [
+ "numpy"
+ ]
+ }
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: typeguard==2.3.1 in /usr/local/lib/python3.10/dist-packages (2.3.1)\n",
+ "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (1.4.1)\n",
+ "Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from sox) (1.23.1)\n",
+ "Requirement already satisfied: bitstring in /usr/local/lib/python3.10/dist-packages (4.1.2)\n",
+ "Requirement already satisfied: bitarray<3.0.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from bitstring) (2.8.1)\n",
+ "Requirement already satisfied: pysptk in /usr/local/lib/python3.10/dist-packages (0.2.1)\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from pysptk) (1.10.1)\n",
+ "Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from pysptk) (4.4.2)\n",
+ "Requirement already satisfied: cython>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from pysptk) (0.29.36)\n",
+ "Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /usr/local/lib/python3.10/dist-packages (from scipy->pysptk) (1.23.1)\n",
+ "Looking in links: https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
+ "Requirement already satisfied: kantts in /usr/local/lib/python3.10/dist-packages (1.0.1)\n",
+ "Requirement already satisfied: librosa>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from kantts) (0.10.1)\n",
+ "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from kantts) (3.7.1)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from kantts) (1.23.1)\n",
+ "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from kantts) (0.56.4)\n",
+ "Requirement already satisfied: unidecode in /usr/local/lib/python3.10/dist-packages (from kantts) (1.3.6)\n",
+ "Requirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from kantts) (7.0.0)\n",
+ "Requirement already satisfied: pywavelets>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.4.1)\n",
+ "Requirement already satisfied: scikit-learn>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.2.2)\n",
+ "Requirement already satisfied: scipy>=1.7.3 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.10.1)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kantts) (4.66.1)\n",
+ "Requirement already satisfied: pysptk in /usr/local/lib/python3.10/dist-packages (from kantts) (0.2.1)\n",
+ "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (from kantts) (1.4.1)\n",
+ "Requirement already satisfied: ttsfrd in /usr/local/lib/python3.10/dist-packages (from kantts) (0.2.1)\n",
+ "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (3.0.0)\n",
+ "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.3.2)\n",
+ "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (4.4.2)\n",
+ "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.12.1)\n",
+ "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.7.0)\n",
+ "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.3.6)\n",
+ "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (4.5.0)\n",
+ "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.3)\n",
+ "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.0.5)\n",
+ "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->kantts) (0.39.1)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba->kantts) (67.7.2)\n",
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0.2->kantts) (3.2.0)\n",
+ "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->kantts) (1.10.12)\n",
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (1.1.0)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (0.11.0)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (4.42.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (1.4.5)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (23.1)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (9.4.0)\n",
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (3.1.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (2.8.2)\n",
+ "Requirement already satisfied: cython>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from pysptk->kantts) (0.29.36)\n",
+ "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.2->kantts) (3.10.0)\n",
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.2->kantts) (2.31.0)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->kantts) (1.16.0)\n",
+ "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa>=0.9.2->kantts) (1.15.1)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa>=0.9.2->kantts) (2.21)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (3.2.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (3.4)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (2.0.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (2023.7.22)\n",
+ "Requirement already satisfied: pytorch_wavelets in /usr/local/lib/python3.10/dist-packages (1.3.0)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (1.23.1)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (1.16.0)\n",
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (2.0.1+cu118)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.12.2)\n",
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (4.5.0)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (1.12)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.1.2)\n",
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (2.0.0)\n",
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch_wavelets) (3.27.4.1)\n",
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch_wavelets) (16.0.6)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->pytorch_wavelets) (2.1.3)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->pytorch_wavelets) (1.3.0)\n",
+ "Requirement already satisfied: tensorboardX in /usr/local/lib/python3.10/dist-packages (2.6.2.2)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (1.23.1)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (23.1)\n",
+ "Requirement already satisfied: protobuf>=3.20 in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (3.20.3)\n",
+ "fatal: destination path 'pytorch_wavelets' already exists and is not an empty directory.\n",
+ "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)\n",
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.1.0)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.11.0)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.42.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.5)\n",
+ "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.23.1)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (23.1)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (9.4.0)\n",
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.1.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (2.8.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
+ "Collecting numpy==1.22.0\n",
+ " Using cached numpy-1.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)\n",
+ "Installing collected packages: numpy\n",
+ " Attempting uninstall: numpy\n",
+ " Found existing installation: numpy 1.23.1\n",
+ " Uninstalling numpy-1.23.1:\n",
+ " Successfully uninstalled numpy-1.23.1\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "librosa 0.10.1 requires numpy!=1.22.0,!=1.22.1,!=1.22.2,>=1.20.3, but you have numpy 1.22.0 which is incompatible.\n",
+ "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\n",
+ "plotnine 0.12.3 requires numpy>=1.23.0, but you have numpy 1.22.0 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed numpy-1.22.0\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.colab-display-data+json": {
+ "pip_warning": {
+ "packages": [
+ "numpy"
+ ]
+ }
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Reading package lists... Done\n",
+ "Building dependency tree... Done\n",
+ "Reading state information... Done\n",
+ "sox is already the newest version (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1).\n",
+ "0 upgraded, 0 newly installed, 0 to remove and 16 not upgraded.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cd pytorch_wavelets"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "A5mtXiAEUeOi",
+ "outputId": "a2fa5038-a929-4b78-f9d0-0f0b8ed7f02b"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/Personal-TTS-v2/pytorch_wavelets\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "pip install ."
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "0279o9cEUgYo",
+ "outputId": "4da47234-d8b2-448f-b494-bb7acc477994"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Processing /content/Personal-TTS-v2/pytorch_wavelets\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (1.22.0)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (1.16.0)\n",
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (2.0.1+cu118)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.12.2)\n",
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (4.5.0)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (1.12)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.1)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.1.2)\n",
+ "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (2.0.0)\n",
+ "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch-wavelets==1.3.0) (3.27.4.1)\n",
+ "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch-wavelets==1.3.0) (16.0.6)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->pytorch-wavelets==1.3.0) (2.1.3)\n",
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->pytorch-wavelets==1.3.0) (1.3.0)\n",
+ "Building wheels for collected packages: pytorch-wavelets\n",
+ " Building wheel for pytorch-wavelets (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for pytorch-wavelets: filename=pytorch_wavelets-1.3.0-py3-none-any.whl size=54852 sha256=cd4232f63138e8a8d6e09752e8296074b3653966a7e03a90ef6c44dd7a69f88a\n",
+ " Stored in directory: /tmp/pip-ephem-wheel-cache-hlkhgtfy/wheels/9a/f5/a7/b8c901126021c077bfd6fbe9f8100e26345c85e77e49f5d444\n",
+ "Successfully built pytorch-wavelets\n",
+ "Installing collected packages: pytorch-wavelets\n",
+ " Attempting uninstall: pytorch-wavelets\n",
+ " Found existing installation: pytorch-wavelets 1.3.0\n",
+ " Uninstalling pytorch-wavelets-1.3.0:\n",
+ " Successfully uninstalled pytorch-wavelets-1.3.0\n",
+ "Successfully installed pytorch-wavelets-1.3.0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cd .."
+ ],
+ "metadata": {
+ "id": "GpzRO2X6irTm",
+ "outputId": "b2b3f8de-763f-4987-e326-cf20b1683859",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/Personal-TTS-v2\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import sox\n",
+ "! pip install gradio\n",
+ "import gradio as gr"
+ ],
+ "metadata": {
+ "id": "TOvKb2-phId5",
+ "outputId": "65526142-deb1-4600-98ae-00281ebaa66b",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: gradio in /usr/local/lib/python3.10/dist-packages (3.43.2)\n",
+ "Requirement already satisfied: aiofiles<24.0,>=22.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (23.2.1)\n",
+ "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n",
+ "Requirement already satisfied: fastapi in /usr/local/lib/python3.10/dist-packages (from gradio) (0.103.1)\n",
+ "Requirement already satisfied: ffmpy in /usr/local/lib/python3.10/dist-packages (from gradio) (0.3.1)\n",
+ "Requirement already satisfied: gradio-client==0.5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.5.0)\n",
+ "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from gradio) (0.24.1)\n",
+ "Requirement already satisfied: huggingface-hub>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.16.4)\n",
+ "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n",
+ "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.2)\n",
+ "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.3)\n",
+ "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n",
+ "Requirement already satisfied: numpy~=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.22.0)\n",
+ "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.9.7)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (23.1)\n",
+ "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.5.3)\n",
+ "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (9.4.0)\n",
+ "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.10.12)\n",
+ "Requirement already satisfied: pydub in /usr/local/lib/python3.10/dist-packages (from gradio) (0.25.1)\n",
+ "Requirement already satisfied: python-multipart in /usr/local/lib/python3.10/dist-packages (from gradio) (0.0.6)\n",
+ "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n",
+ "Requirement already satisfied: requests~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.31.0)\n",
+ "Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.10.0)\n",
+ "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.5.0)\n",
+ "Requirement already satisfied: uvicorn>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.23.2)\n",
+ "Requirement already satisfied: websockets<12.0,>=10.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (11.0.3)\n",
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.5.0->gradio) (2023.6.0)\n",
+ "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.4)\n",
+ "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (4.19.0)\n",
+ "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.12.0)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (3.12.2)\n",
+ "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (4.66.1)\n",
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.1.0)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.11.0)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.42.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.5)\n",
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2023.3.post1)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (3.2.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (3.4)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (2.0.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (2023.7.22)\n",
+ "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (8.1.7)\n",
+ "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (0.14.0)\n",
+ "Requirement already satisfied: anyio<4.0.0,>=3.7.1 in /usr/local/lib/python3.10/dist-packages (from fastapi->gradio) (3.7.1)\n",
+ "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /usr/local/lib/python3.10/dist-packages (from fastapi->gradio) (0.27.0)\n",
+ "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (0.17.3)\n",
+ "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (1.3.0)\n",
+ "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4.0.0,>=3.7.1->fastapi->gradio) (1.1.3)\n",
+ "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (23.1.0)\n",
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.7.1)\n",
+ "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.30.2)\n",
+ "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.10.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import subprocess\n",
+ "import random\n",
+ "import os\n",
+ "from pathlib import Path\n",
+ "import librosa\n",
+ "from scipy.io import wavfile\n",
+ "import numpy as np\n",
+ "import torch\n",
+ "import csv\n",
+ "import whisper\n",
+ "\n",
+ "def split_long_audio(model, filepaths, save_dir=\"data_dir\", out_sr=44100):\n",
+ " if isinstance(filepaths, str):\n",
+ " filepaths = [filepaths]\n",
+ "\n",
+ " for file_idx, filepath in enumerate(filepaths):\n",
+ "\n",
+ " save_path = Path(save_dir)\n",
+ " save_path.mkdir(exist_ok=True, parents=True)\n",
+ "\n",
+ " print(f\"Transcribing file {file_idx}: '{filepath}' to segments...\")\n",
+ " result = model.transcribe(filepath, word_timestamps=True, task=\"transcribe\", beam_size=5, best_of=5)\n",
+ " segments = result['segments']\n",
+ "\n",
+ " wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)\n",
+ " wav, _ = librosa.effects.trim(wav, top_db=20)\n",
+ " peak = np.abs(wav).max()\n",
+ " if peak > 1.0:\n",
+ " wav = 0.98 * wav / peak\n",
+ " wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)\n",
+ " wav2 /= max(wav2.max(), -wav2.min())\n",
+ "\n",
+ " for i, seg in enumerate(segments):\n",
+ " start_time = seg['start']\n",
+ " end_time = seg['end']\n",
+ " wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]\n",
+ " wav_seg_name = f\"{file_idx}_{i}.wav\"\n",
+ " out_fpath = save_path / wav_seg_name\n",
+ " wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))\n",
+ "\n",
+ "whisper_size = \"medium\"\n",
+ "whisper_model = whisper.load_model(whisper_size)"
+ ],
+ "metadata": {
+ "id": "NdoD-ZnIaWhN"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from modelscope.tools import run_auto_label\n",
+ "\n",
+ "from modelscope.models.audio.tts import SambertHifigan\n",
+ "from modelscope.pipelines import pipeline\n",
+ "from modelscope.utils.constant import Tasks\n",
+ "\n",
+ "from modelscope.metainfo import Trainers\n",
+ "from modelscope.trainers import build_trainer\n",
+ "from modelscope.utils.audio.audio_utils import TtsTrainType\n",
+ "\n",
+ "pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'\n",
+ "\n",
+ "dataset_id = \"/content/Personal-TTS-v2/output_training_data/\"\n",
+ "pretrain_work_dir = \"/content/Personal-TTS-v2/pretrain_work_dir/\"\n",
+ "\n",
+ "\n",
+ "def auto_label(audio):\n",
+ " try:\n",
+ " split_long_audio(whisper_model, audio, \"/content/Personal-TTS-v2/test_wavs\")\n",
+ " input_wav = \"/content/Personal-TTS-v2/test_wavs/\"\n",
+ " output_data = \"/content/Personal-TTS-v2/output_training_data/\"\n",
+ " ret, report = run_auto_label(input_wav=input_wav, work_dir=output_data, resource_revision=\"v1.0.7\")\n",
+ "\n",
+ " except Exception:\n",
+ " pass\n",
+ " return \"标注成功\"\n",
+ "\n",
+ "\n",
+ "def train(a):\n",
+ " try:\n",
+ " train_info = {\n",
+ " TtsTrainType.TRAIN_TYPE_SAMBERT: { # 配置训练AM(sambert)模型\n",
+ " 'train_steps': 52, # 训练多少个step\n",
+ " 'save_interval_steps': 50, # 每训练多少个step保存一次checkpoint\n",
+ " 'log_interval': 10 # 每训练多少个step打印一次训练日志\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " # 配置训练参数,指定数据集,临时工作目录和train_info\n",
+ " kwargs = dict(\n",
+ " model=pretrained_model_id, # 指定要finetune的模型\n",
+ " model_revision = \"v1.0.6\",\n",
+ " work_dir=pretrain_work_dir, # 指定临时工作目录\n",
+ " train_dataset=dataset_id, # 指定数据集id\n",
+ " train_type=train_info # 指定要训练类型及参数\n",
+ " )\n",
+ "\n",
+ " trainer = build_trainer(Trainers.speech_kantts_trainer,\n",
+ " default_args=kwargs)\n",
+ "\n",
+ " trainer.train()\n",
+ "\n",
+ " except Exception:\n",
+ " pass\n",
+ "\n",
+ " return \"训练完成\"\n",
+ "\n",
+ "\n",
+ "import random\n",
+ "\n",
+ "def infer(text):\n",
+ "\n",
+ " model_dir = \"/content/Personal-TTS-v2/pretrain_work_dir\"\n",
+ "\n",
+ " custom_infer_abs = {\n",
+ " 'voice_name':\n",
+ " 'F7',\n",
+ " 'am_ckpt':\n",
+ " os.path.join(model_dir, 'tmp_am', 'ckpt'),\n",
+ " 'am_config':\n",
+ " os.path.join(model_dir, 'tmp_am', 'config.yaml'),\n",
+ " 'voc_ckpt':\n",
+ " os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),\n",
+ " 'voc_config':\n",
+ " os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',\n",
+ " 'config.yaml'),\n",
+ " 'audio_config':\n",
+ " os.path.join(model_dir, 'data', 'audio_config.yaml'),\n",
+ " 'se_file':\n",
+ " os.path.join(model_dir, 'data', 'se', 'se.npy')\n",
+ " }\n",
+ " kwargs = {'custom_ckpt': custom_infer_abs}\n",
+ "\n",
+ " model_id = SambertHifigan(os.path.join(model_dir, \"orig_model\"), **kwargs)\n",
+ "\n",
+ " inference = pipeline(task=Tasks.text_to_speech, model=model_id)\n",
+ " output = inference(input=text)\n",
+ "\n",
+ " filename = str(random.randint(1, 1000000000000))\n",
+ "\n",
+ " with open(filename + \"myfile.wav\", mode='bx') as f:\n",
+ " f.write(output[\"output_wav\"])\n",
+ " return filename + \"myfile.wav\""
+ ],
+ "metadata": {
+ "id": "soFE76hH-te7",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "96ff1a60-33b7-437f-f1f7-be538c5bc747"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "2023-09-09 07:29:57,174 - modelscope - INFO - PyTorch version 2.0.1+cu118 Found.\n",
+ "2023-09-09 07:29:57,178 - modelscope - INFO - TensorFlow version 2.13.0 Found.\n",
+ "2023-09-09 07:29:57,179 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer\n",
+ "2023-09-09 07:29:57,214 - modelscope - INFO - Loading done! Current index file version is 1.9.0, with md5 26bab0b425de9d3ec01e3de23e7c0f4d and a total number of 921 components indexed\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "auto_label(\"nana_speech.wav\")\n",
+ "train(\"test\")\n",
+ "infer(\"欢迎使用滔滔智能的声音克隆产品\")"
+ ],
+ "metadata": {
+ "id": "xHSIK3gLjsVA",
+ "outputId": "ea68f7e7-2509-46a7-8933-0d99814c9c43",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Transcribing file 0: 'nana_speech.wav' to segments...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "2023-09-09 07:30:22,206 - modelscope - INFO - Use user-specified model revision: v1.0.7\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "2023-09-09 07:30:30\n",
+ "TTS-AutoLabel version: 1.1.8\n",
+ "TTS-AutoLabel resource path: /root/.cache/modelscope/hub/damo/speech_ptts_autolabel_16k/model\n",
+ "Target sampling rate: 16000\n",
+ "Input wav dir: /content/Personal-TTS-v2/test_wavs\n",
+ "Output data dir: /content/Personal-TTS-v2/output_training_data\n",
+ "wav_preprocess start...\n",
+ "--- new folder... ---\n",
+ "--- OK ---\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 34/34 [00:00<00:00, 112.07it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[VAD] chunk recordings for training.\n",
+ "wav cut by vad start...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/34 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_6_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 3%|▎ | 1/34 [00:00<00:10, 3.11it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_9_S0000 is no need to cut\n",
+ "wav 0_14_S0000 is no need to cut\n",
+ "wav 0_3_S0000 is no need to cut\n",
+ "wav 0_17_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 15%|█▍ | 5/34 [00:00<00:02, 13.83it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_5_S0000 is no need to cut\n",
+ "wav 0_32_S0000 is no need to cut\n",
+ "wav 0_0_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 24%|██▎ | 8/34 [00:00<00:01, 17.31it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_13_S0000 is no need to cut\n",
+ "wav 0_27_S0000 is no need to cut\n",
+ "wav 0_2_S0000 is no need to cut\n",
+ "wav 0_11_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 35%|███▌ | 12/34 [00:00<00:01, 21.94it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_12_S0000 is no need to cut\n",
+ "wav 0_1_S0000 is no need to cut\n",
+ "wav 0_16_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 44%|████▍ | 15/34 [00:00<00:00, 23.32it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_23_S0000 is no need to cut\n",
+ "wav 0_8_S0000 is no need to cut\n",
+ "VAD: 0_26_S0000.wav has 17920 samples, shorter than expected samples, skipping saving in S\n",
+ "wav 0_22_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 56%|█████▌ | 19/34 [00:00<00:00, 27.22it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_33_S0000 is no need to cut\n",
+ "wav 0_29_S0000 is no need to cut\n",
+ "wav 0_28_S0000 is no need to cut\n",
+ "wav 0_20_S0000 is no need to cut\n",
+ "wav 0_10_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 71%|███████ | 24/34 [00:01<00:00, 33.14it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "VAD: 0_30_S0000.wav has 10560 samples, shorter than expected samples, skipping saving in S\n",
+ "wav 0_18_S0000 is no need to cut\n",
+ "wav 0_21_S0000 is no need to cut\n",
+ "VAD: 0_15_S0000.wav has 11200 samples, shorter than expected samples, skipping saving in S\n",
+ "wav 0_31_S0000 is no need to cut\n",
+ "VAD: 0_24_S0000.wav has 17920 samples, shorter than expected samples, skipping saving in S\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 88%|████████▊ | 30/34 [00:01<00:00, 39.36it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav 0_25_S0000 is no need to cut\n",
+ "wav 0_4_S0000 is no need to cut\n",
+ "wav 0_19_S0000 is no need to cut\n",
+ "wav 0_7_S0000 is no need to cut\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 34/34 [00:01<00:00, 27.87it/s]\n",
+ "0it [00:00, ?it/s]\n",
+ "0it [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "--- new folder... ---\n",
+ "--- OK ---\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 199.00it/s]\n",
+ "100%|██████████| 30/30 [00:01<00:00, 20.81it/s]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Text to label start...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 30/30 [00:04<00:00, 6.14it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "pre-break recording in paragraph by vad.\n",
+ "Generate phone interval by fa align.\n",
+ "prosody_dir=/content/Personal-TTS-v2/output_training_data/paragraph/prosody\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "FA processing...\n",
+ "--- New folder /content/Personal-TTS-v2/output_training_data/raw_ali... ---\n",
+ "--- OK ---\n",
+ "--- New folder /content/Personal-TTS-v2/output_training_data/raw_interval... ---\n",
+ "--- OK ---\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/30 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_14_S0000.wav, text: 长出 厚厚的 剪子 给 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 3%|▎ | 1/30 [00:00<00:03, 7.62it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_4_S0000.wav, text: 我 是 从 六岁 开始 学琴的 所以 就 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_31_S0000.wav, text: 他有 这样的 经验吧 就是 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_1_S0000.wav, text: 八岁的 时候 立志 成为 一个 像 马游勇 一样的 大提琴 演奏家在 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 13%|█▎ | 4/30 [00:00<00:01, 17.10it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_19_S0000.wav, text: 右手 要 无限的 撑开 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_0_S0000.wav, text: 在 十二岁 以前呢 我 从来 没有 想过 自己 会 演戏 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_18_S0000.wav, text: 在在 恋情的 过程 当中呢 你的左 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 23%|██▎ | 7/30 [00:00<00:01, 21.37it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_23_S0000.wav, text: 这 这个 是我 一直 在跟 这些 小朋友 讲了 讲的 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_20_S0000.wav, text: 穿 这样的 东西 所 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_10_S0000.wav, text: 像 火烧 一样 疼 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_9_S0000.wav, text: 练完 一段 时间 之后 就会 发现 手指 开始 发红 发肿像 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 37%|███▋ | 11/30 [00:00<00:00, 23.82it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_13_S0000.wav, text: 要 练到 直到 第一 层皮 退下去 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_2_S0000.wav, text: 在 非常 神圣的 音乐厅 里面 用 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_11_S0000.wav, text: 因为 恋情 这种 事情 是 不能 间断的 一恋 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 47%|████▋ | 14/30 [00:00<00:00, 22.68it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_12_S0000.wav, text: 就练 好几个 小时 练 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_17_S0000.wav, text: 左手 要比 右手 大 那么 一点 点因 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_16_S0000.wav, text: 说 两只手 是 不同 大小的 我的 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_28_S0000.wav, text: 就 很好胜的 人 我觉 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 60%|██████ | 18/30 [00:00<00:00, 25.95it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_22_S0000.wav, text: 比 学历 更重要啊 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_21_S0000.wav, text: 所以 你我 觉得 人 要有 一技之长呢 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_8_S0000.wav, text: 手指 要 不停的 在 纸板 上面 摩擦 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 70%|███████ | 21/30 [00:00<00:00, 26.00it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_3_S0000.wav, text: 音乐 感染 每一个人 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_25_S0000.wav, text: 今天 是在 学校嘛 其住 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_33_S0000.wav, text: 被 公车门 夹到 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_7_S0000.wav, text: 开始 练琴的 时候 因为 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_29_S0000.wav, text: 拿个 很简单的 例子 好了 打光 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 87%|████████▋ | 26/30 [00:01<00:00, 29.90it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_5_S0000.wav, text: 其实 我 特别 能 理解 那些 中途 想要 放弃 逃跑的 一些 学生 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_6_S0000.wav, text: 对 恋情 这件 事情 太不 容易了 刚开 \n",
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_27_S0000.wav, text: 也 相信 大家 都 看得 出来 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 97%|█████████▋| 29/30 [00:01<00:00, 24.09it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_32_S0000.wav, text: 人 很多的 时候 被 挤到 最后 然后 \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r100%|██████████| 30/30 [00:01<00:00, 23.94it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "--- There is this folder! ---\n",
+ "0_27_S0000.ali\n",
+ "0_31_S0000.ali\n",
+ "0_9_S0000.ali\n",
+ "0_32_S0000.ali\n",
+ "0_18_S0000.ali\n",
+ "0_5_S0000.ali\n",
+ "0_33_S0000.ali\n",
+ "0_8_S0000.ali\n",
+ "0_4_S0000.ali\n",
+ "0_20_S0000.ali\n",
+ "0_0_S0000.ali\n",
+ "0_7_S0000.ali\n",
+ "0_28_S0000.ali\n",
+ "0_3_S0000.ali\n",
+ "0_14_S0000.ali\n",
+ "0_6_S0000.ali\n",
+ "0_2_S0000.ali\n",
+ "0_19_S0000.ali\n",
+ "0_12_S0000.ali\n",
+ "0_23_S0000.ali\n",
+ "0_22_S0000.ali\n",
+ "0_21_S0000.ali\n",
+ "0_25_S0000.ali\n",
+ "0_11_S0000.ali\n",
+ "0_10_S0000.ali\n",
+ "0_29_S0000.ali\n",
+ "0_13_S0000.ali\n",
+ "0_16_S0000.ali\n",
+ "0_17_S0000.ali\n",
+ "0_1_S0000.ali\n",
+ "--- New folder /content/Personal-TTS-v2/output_training_data/ali... ---\n",
+ "--- OK ---\n",
+ "--- New folder /content/Personal-TTS-v2/output_training_data/coarse_interval... ---\n",
+ "--- OK ---\n",
+ "Trim silence wav with align info and modify wav files....\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 146.67it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Convert align info to interval files....\n",
+ "--- There is this folder! ---\n",
+ "0_27_S0000.ali\n",
+ "0_31_S0000.ali\n",
+ "0_9_S0000.ali\n",
+ "0_32_S0000.ali\n",
+ "0_18_S0000.ali\n",
+ "0_5_S0000.ali\n",
+ "0_33_S0000.ali\n",
+ "0_8_S0000.ali\n",
+ "0_4_S0000.ali\n",
+ "0_20_S0000.ali\n",
+ "0_0_S0000.ali\n",
+ "0_7_S0000.ali\n",
+ "0_28_S0000.ali\n",
+ "0_3_S0000.ali\n",
+ "0_14_S0000.ali\n",
+ "0_6_S0000.ali\n",
+ "0_2_S0000.ali\n",
+ "0_19_S0000.ali\n",
+ "0_12_S0000.ali\n",
+ "0_23_S0000.ali\n",
+ "0_22_S0000.ali\n",
+ "0_21_S0000.ali\n",
+ "0_25_S0000.ali\n",
+ "0_11_S0000.ali\n",
+ "0_10_S0000.ali\n",
+ "0_29_S0000.ali\n",
+ "0_13_S0000.ali\n",
+ "0_16_S0000.ali\n",
+ "0_17_S0000.ali\n",
+ "0_1_S0000.ali\n",
+ "qualification review.\n",
+ "prosody sillence detect.\n",
+ "--- Remove /content/Personal-TTS-v2/output_training_data/prosody folder! ---\n",
+ "--- New folder /content/Personal-TTS-v2/output_training_data/prosody... ---\n",
+ "--- OK ---\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 60/60 [00:00<00:00, 5585.33it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Write prosody file\n",
+ "0 \"mismatch\" sentences\n",
+ "\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Trim sp started\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 30/30 [00:00<00:00, 151.59it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Trim sp finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Auto labeling info: stage 1 | develop mode 0 | gender:female | score 10.000000 | retcode 0\n",
+ "labeling report:\n",
+ "stage 1 | develop mode 0 | gender female | score 10.000000 | retcode 0\n",
+ "qulification report:\n",
+ "credit score: 10.000000\n",
+ "qualified score: 3.000000\n",
+ "normalized snr: 35.000000\n",
+ "abandon utt snr threshold: 10.000000\n",
+ "snr score ration: 0.500000\n",
+ "interval score ration: 0.500000\n",
+ "data qulificaion report:\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "2023-09-09 07:30:49,683 - modelscope - INFO - Use user-specified model revision: v1.0.6\n",
+ "2023-09-09 07:30:51,894 - modelscope - INFO - Use user-specified model revision: v1.0.6\n",
+ "2023-09-09 07:30:52,717 - modelscope - INFO - Set workdir to /content/Personal-TTS-v2/pretrain_work_dir/\n",
+ "2023-09-09 07:30:52,741 - modelscope - INFO - load /content/Personal-TTS-v2/output_training_data/\n",
+ "2023-09-09 07:30:53,292 - modelscope - INFO - Use user-specified model revision: v1.0.6\n",
+ "2023-09-09 07:30:55,738 - modelscope - INFO - am_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/sambert/config.yaml voc_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/config.yaml\n",
+ "2023-09-09 07:30:55,740 - modelscope - INFO - audio_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/audio_config_se_16k.yaml\n",
+ "2023-09-09 07:30:55,740 - modelscope - INFO - am_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/sambert/ckpt/checkpoint_2400000.pth')])\n",
+ "2023-09-09 07:30:55,741 - modelscope - INFO - voc_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/ckpt/checkpoint_2400000.pth')])\n",
+ "2023-09-09 07:30:55,742 - modelscope - INFO - se_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/se.npy se_model_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/speaker_embedding/se.onnx\n",
+ "2023-09-09 07:30:55,743 - modelscope - INFO - mvn_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/mvn.npy\n",
+ "100%|██████████| 60/60 [00:00<00:00, 6437.10it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "TextScriptConvertor.process:\n",
+ "Save script to: /content/Personal-TTS-v2/pretrain_work_dir/data/Script.xml\n",
+ "TextScriptConvertor.process:\n",
+ "Save metafile to: /content/Personal-TTS-v2/pretrain_work_dir/data/raw_metafile.txt\n",
+ "[AudioProcessor] Initialize AudioProcessor.\n",
+ "[AudioProcessor] config params:\n",
+ "[AudioProcessor] wav_normalize: True\n",
+ "[AudioProcessor] trim_silence: True\n",
+ "[AudioProcessor] trim_silence_threshold_db: 60\n",
+ "[AudioProcessor] preemphasize: False\n",
+ "[AudioProcessor] sampling_rate: 16000\n",
+ "[AudioProcessor] hop_length: 200\n",
+ "[AudioProcessor] win_length: 1000\n",
+ "[AudioProcessor] n_fft: 2048\n",
+ "[AudioProcessor] n_mels: 80\n",
+ "[AudioProcessor] fmin: 0.0\n",
+ "[AudioProcessor] fmax: 8000.0\n",
+ "[AudioProcessor] phone_level_feature: True\n",
+ "[AudioProcessor] se_feature: True\n",
+ "[AudioProcessor] norm_type: mean_std\n",
+ "[AudioProcessor] max_norm: 1.0\n",
+ "[AudioProcessor] symmetric: False\n",
+ "[AudioProcessor] min_level_db: -100.0\n",
+ "[AudioProcessor] ref_level_db: 20\n",
+ "[AudioProcessor] num_workers: 16\n",
+ "[AudioProcessor] Amplitude normalization started\n",
+ "Volume statistic proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 85.39it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Average amplitude RMS : 0.13875506666666668\n",
+ "Volume statistic done.\n",
+ "Volume normalization proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 1784.76it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Volume normalization done.\n",
+ "[AudioProcessor] Amplitude normalization finished\n",
+ "[AudioProcessor] Duration generation started\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/30 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Duration align with mel is proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 30/30 [00:00<00:00, 44.11it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Duration generate finished\n",
+ "[AudioProcessor] Trim silence with interval started\n",
+ "[AudioProcessor] Start to load pcm from /content/Personal-TTS-v2/pretrain_work_dir/data/wav\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 43.33it/s]\n",
+ " 0%| | 0/30 [00:00, ?it/s]\n",
+ "100%|██████████| 30/30 [00:00<00:00, 2887.38it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Trim silence finished\n",
+ "[AudioProcessor] Melspec extraction started\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:01<00:00, 22.52it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Melspec extraction finished\n",
+ "Melspec statistic proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 27497.62it/s]\n",
+ "100%|██████████| 30/30 [00:00<00:00, 6936.17it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Melspec statistic done\n",
+ "[AudioProcessor] melspec mean and std saved to:\n",
+ "/content/Personal-TTS-v2/pretrain_work_dir/data/mel/mel_mean.txt,\n",
+ "/content/Personal-TTS-v2/pretrain_work_dir/data/mel/mel_std.txt\n",
+ "[AudioProcessor] Melspec mean std norm is proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Melspec normalization finished\n",
+ "[AudioProcessor] Normed Melspec saved to /content/Personal-TTS-v2/pretrain_work_dir/data/mel\n",
+ "[Duration Calibrating] Syllable duration 34 is not equal to the number of symbols 33, index: 0_8_S0000\n",
+ "[AudioProcessor] Pitch extraction started\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/30 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Pitch align with mel is proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 30/30 [00:01<00:00, 29.37it/s]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Pitch normalization is proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 30/30 [00:00<00:00, 23506.28it/s]\n",
+ "100%|██████████| 30/30 [00:00<00:00, 22270.64it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] f0 mean and std saved to:\n",
+ "/content/Personal-TTS-v2/pretrain_work_dir/data/f0/f0_mean.txt,\n",
+ "/content/Personal-TTS-v2/pretrain_work_dir/data/f0/f0_std.txt\n",
+ "[AudioProcessor] Pitch mean std norm is proceeding...\n",
+ "[AudioProcessor] Pitch turn to phone-level is proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 42.49it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Pitch normalization finished\n",
+ "[AudioProcessor] Normed f0 saved to /content/Personal-TTS-v2/pretrain_work_dir/data/f0\n",
+ "[AudioProcessor] Pitch extraction finished\n",
+ "[AudioProcessor] Energy extraction started\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 36.21it/s]\n",
+ "100%|██████████| 30/30 [00:00<00:00, 35757.07it/s]\n",
+ "100%|██████████| 30/30 [00:00<00:00, 23488.73it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] energy mean and std saved to:\n",
+ "/content/Personal-TTS-v2/pretrain_work_dir/data/energy/energy_mean.txt,\n",
+ "/content/Personal-TTS-v2/pretrain_work_dir/data/energy/energy_std.txt\n",
+ "[AudioProcessor] Energy mean std norm is proceeding...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 30/30 [00:00<00:00, 43.47it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[AudioProcessor] Energy normalization finished\n",
+ "[AudioProcessor] Normed Energy saved to /content/Personal-TTS-v2/pretrain_work_dir/data/energy\n",
+ "[AudioProcessor] Energy extraction finished\n",
+ "[AudioProcessor] All features extracted successfully!\n",
+ "Processing audio done.\n",
+ "[SpeakerEmbeddingProcessor] Speaker embedding extractor started\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[SpeakerEmbeddingProcessor] se model loading error!!!\n",
+ "[SpeakerEmbeddingProcessor] please update your se model to ensure that the version is greater than or equal to 1.0.5\n",
+ "[SpeakerEmbeddingProcessor] try load it as se.model\n",
+ "[SpeakerEmbeddingProcessor] Speaker embedding extracted successfully!\n",
+ "Processing speaker embedding done.\n",
+ "Processing done.\n",
+ "Voc metafile generated.\n",
+ "AM metafile generated.\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "2023-09-09 07:32:01,238 - modelscope - INFO - Start training....\n",
+ "2023-09-09 07:32:01,239 - modelscope - INFO - Start SAMBERT training...\n",
+ "2023-09-09 07:32:01,240 - modelscope - INFO - TRAIN SAMBERT....\n",
+ "2023-09-09 07:32:01,254 - modelscope - INFO - TRAINING steps: 2400052\n",
+ "2023-09-09 07:32:01,261 - modelscope - INFO - audio_config = {'fmax': 8000.0, 'fmin': 0.0, 'hop_length': 200, 'max_norm': 1.0, 'min_level_db': -100.0, 'n_fft': 2048, 'n_mels': 80, 'norm_type': 'mean_std', 'num_workers': 16, 'phone_level_feature': True, 'preemphasize': False, 'ref_level_db': 20, 'sampling_rate': 16000, 'symmetric': False, 'trim_silence': True, 'trim_silence_threshold_db': 60, 'wav_normalize': True, 'win_length': 1000}\n",
+ "2023-09-09 07:32:01,262 - modelscope - INFO - Loss = {'MelReconLoss': {'enable': True, 'params': {'loss_type': 'mae'}}, 'ProsodyReconLoss': {'enable': True, 'params': {'loss_type': 'mae'}}}\n",
+ "2023-09-09 07:32:01,263 - modelscope - INFO - Model = {'KanTtsSAMBERT': {'optimizer': {'params': {'betas': [0.9, 0.98], 'eps': 1e-09, 'lr': 0.001, 'weight_decay': 0.0}, 'type': 'Adam'}, 'params': {'MAS': False, 'NSF': True, 'SE': True, 'decoder_attention_dropout': 0.1, 'decoder_dropout': 0.1, 'decoder_ffn_inner_dim': 1024, 'decoder_num_heads': 8, 'decoder_num_layers': 12, 'decoder_num_units': 128, 'decoder_prenet_units': [256, 256], 'decoder_relu_dropout': 0.1, 'dur_pred_lstm_units': 128, 'dur_pred_prenet_units': [128, 128], 'embedding_dim': 512, 'emotion_units': 32, 'encoder_attention_dropout': 0.1, 'encoder_dropout': 0.1, 'encoder_ffn_inner_dim': 1024, 'encoder_num_heads': 8, 'encoder_num_layers': 8, 'encoder_num_units': 128, 'encoder_projection_units': 32, 'encoder_relu_dropout': 0.1, 'max_len': 800, 'nsf_f0_global_maximum': 730.0, 'nsf_f0_global_minimum': 30.0, 'nsf_norm_type': 'global', 'num_mels': 82, 'outputs_per_step': 3, 'postnet_dropout': 0.1, 'postnet_ffn_inner_dim': 512, 'postnet_filter_size': 41, 'postnet_fsmn_num_layers': 4, 'postnet_lstm_units': 128, 'postnet_num_memory_units': 256, 'postnet_shift': 17, 'predictor_dropout': 0.1, 'predictor_ffn_inner_dim': 256, 'predictor_filter_size': 41, 'predictor_fsmn_num_layers': 3, 'predictor_lstm_units': 128, 'predictor_num_memory_units': 128, 'predictor_shift': 0, 'speaker_units': 192}, 'scheduler': {'params': {'warmup_steps': 4000}, 'type': 'NoamLR'}}}\n",
+ "2023-09-09 07:32:01,264 - modelscope - INFO - allow_cache = False\n",
+ "2023-09-09 07:32:01,264 - modelscope - INFO - batch_size = 32\n",
+ "2023-09-09 07:32:01,265 - modelscope - INFO - create_time = 2023-09-09 07:32:01\n",
+ "2023-09-09 07:32:01,266 - modelscope - INFO - eval_interval_steps = 10000000000000000\n",
+ "2023-09-09 07:32:01,267 - modelscope - INFO - git_revision_hash = d16755444c9baf23348213211a5ed9035458ecf0\n",
+ "2023-09-09 07:32:01,268 - modelscope - INFO - grad_norm = 1.0\n",
+ "2023-09-09 07:32:01,269 - modelscope - INFO - linguistic_unit = {'cleaners': 'english_cleaners', 'lfeat_type_list': 'sy,tone,syllable_flag,word_segment,emo_category,speaker_category', 'speaker_list': 'F7'}\n",
+ "2023-09-09 07:32:01,275 - modelscope - INFO - log_interval_steps = 50\n",
+ "2023-09-09 07:32:01,276 - modelscope - INFO - model_type = sambert\n",
+ "2023-09-09 07:32:01,278 - modelscope - INFO - num_save_intermediate_results = 4\n",
+ "2023-09-09 07:32:01,279 - modelscope - INFO - num_workers = 4\n",
+ "2023-09-09 07:32:01,280 - modelscope - INFO - pin_memory = False\n",
+ "2023-09-09 07:32:01,281 - modelscope - INFO - remove_short_samples = False\n",
+ "2023-09-09 07:32:01,284 - modelscope - INFO - save_interval_steps = 50\n",
+ "2023-09-09 07:32:01,285 - modelscope - INFO - train_max_steps = 2400052\n",
+ "2023-09-09 07:32:01,286 - modelscope - INFO - train_steps = 52\n",
+ "2023-09-09 07:32:01,287 - modelscope - INFO - log_interval = 10\n",
+ "2023-09-09 07:32:01,288 - modelscope - INFO - modelscope_version = 1.9.0\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading metafile...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 28/28 [00:00<00:00, 28883.55it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading metafile...\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 5236.33it/s]\n",
+ "2023-09-09 07:32:01,307 - modelscope - INFO - The number of training files = 28.\n",
+ "2023-09-09 07:32:01,308 - modelscope - INFO - The number of validation files = 1.\n",
+ "2023-09-09 07:32:02,161 - modelscope - INFO - Successfully resumed from /content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/sambert/ckpt/checkpoint_2400000.pth.\n",
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Checkpoint saved at step 2400000\n",
+ "(Steps: 2400000) train/TotalLoss = 0.0395.\n",
+ "(Steps: 2400000) train/mel_loss_ = 0.0091.\n",
+ "(Steps: 2400000) train/mel_loss = 0.0081.\n",
+ "(Steps: 2400000) train/dur_loss = 0.0056.\n",
+ "(Steps: 2400000) train/pitch_loss = 0.0076.\n",
+ "(Steps: 2400000) train/energy_loss = 0.0091.\n",
+ "(Steps: 2400000) train/batch_size = 0.5600.\n",
+ "(Steps: 2400000) train/x_band_width = 0.4200.\n",
+ "(Steps: 2400000) train/h_band_width = 0.4200.\n",
+ "KanTtsSAMBERT learning rate: 0.000082\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 1/1 [00:02<00:00, 2.63s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 0 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.65it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 1 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.72it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 2 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.65it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 3 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.75it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 4 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.61it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 5 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.73it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 6 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.55it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 7 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.66it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 8 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.66it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 9 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(Steps: 2400010) train/TotalLoss = 0.3747.\n",
+ "(Steps: 2400010) train/mel_loss_ = 0.0853.\n",
+ "(Steps: 2400010) train/mel_loss = 0.0710.\n",
+ "(Steps: 2400010) train/dur_loss = 0.0537.\n",
+ "(Steps: 2400010) train/pitch_loss = 0.0752.\n",
+ "(Steps: 2400010) train/energy_loss = 0.0896.\n",
+ "(Steps: 2400010) train/batch_size = 5.6000.\n",
+ "(Steps: 2400010) train/x_band_width = 4.2000.\n",
+ "(Steps: 2400010) train/h_band_width = 4.2000.\n",
+ "KanTtsSAMBERT learning rate: 0.000082\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.60it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 10 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.61it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 11 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.63it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 12 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.65it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 13 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.69it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 14 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.72it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 15 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.73it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 16 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.72it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 17 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.78it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 18 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.72it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 19 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(Steps: 2400020) train/TotalLoss = 0.3435.\n",
+ "(Steps: 2400020) train/mel_loss_ = 0.0777.\n",
+ "(Steps: 2400020) train/mel_loss = 0.0616.\n",
+ "(Steps: 2400020) train/dur_loss = 0.0474.\n",
+ "(Steps: 2400020) train/pitch_loss = 0.0709.\n",
+ "(Steps: 2400020) train/energy_loss = 0.0859.\n",
+ "(Steps: 2400020) train/batch_size = 5.6000.\n",
+ "(Steps: 2400020) train/x_band_width = 4.2000.\n",
+ "(Steps: 2400020) train/h_band_width = 4.2000.\n",
+ "KanTtsSAMBERT learning rate: 0.000082\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.66it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 20 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.76it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 21 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.76it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 22 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.80it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 23 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.73it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 24 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.74it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 25 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.71it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 26 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.77it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 27 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.78it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 28 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.77it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 29 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(Steps: 2400030) train/TotalLoss = 0.3183.\n",
+ "(Steps: 2400030) train/mel_loss_ = 0.0735.\n",
+ "(Steps: 2400030) train/mel_loss = 0.0567.\n",
+ "(Steps: 2400030) train/dur_loss = 0.0422.\n",
+ "(Steps: 2400030) train/pitch_loss = 0.0639.\n",
+ "(Steps: 2400030) train/energy_loss = 0.0821.\n",
+ "(Steps: 2400030) train/batch_size = 5.6000.\n",
+ "(Steps: 2400030) train/x_band_width = 4.2000.\n",
+ "(Steps: 2400030) train/h_band_width = 4.2000.\n",
+ "KanTtsSAMBERT learning rate: 0.000082\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.71it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 30 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.72it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 31 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.65it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 32 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.62it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 33 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.67it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 34 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.67it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 35 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.63it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 36 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.67it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 37 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.58it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 38 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.61it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 39 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(Steps: 2400040) train/TotalLoss = 0.2936.\n",
+ "(Steps: 2400040) train/mel_loss_ = 0.0707.\n",
+ "(Steps: 2400040) train/mel_loss = 0.0540.\n",
+ "(Steps: 2400040) train/dur_loss = 0.0382.\n",
+ "(Steps: 2400040) train/pitch_loss = 0.0518.\n",
+ "(Steps: 2400040) train/energy_loss = 0.0789.\n",
+ "(Steps: 2400040) train/batch_size = 5.6000.\n",
+ "(Steps: 2400040) train/x_band_width = 4.2000.\n",
+ "(Steps: 2400040) train/h_band_width = 4.2000.\n",
+ "KanTtsSAMBERT learning rate: 0.000082\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.55it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 40 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.65it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 41 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.67it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 42 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.70it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 43 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.74it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 44 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.74it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 45 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.78it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 46 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.75it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 47 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.76it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 48 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.76it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 49 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Checkpoint saved at step 2400050\n",
+ "(Steps: 2400050) train/TotalLoss = 0.2746.\n",
+ "(Steps: 2400050) train/mel_loss_ = 0.0687.\n",
+ "(Steps: 2400050) train/mel_loss = 0.0518.\n",
+ "(Steps: 2400050) train/dur_loss = 0.0354.\n",
+ "(Steps: 2400050) train/pitch_loss = 0.0426.\n",
+ "(Steps: 2400050) train/energy_loss = 0.0760.\n",
+ "(Steps: 2400050) train/batch_size = 5.6000.\n",
+ "(Steps: 2400050) train/x_band_width = 4.2000.\n",
+ "(Steps: 2400050) train/h_band_width = 4.2000.\n",
+ "KanTtsSAMBERT learning rate: 0.000082\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.10it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 50 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 51 finished\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "2023-09-09 07:32:35,661 - modelscope - INFO - SAMBERT training spent: 0.01 hours\n",
+ "\n",
+ "2023-09-09 07:32:35,662 - modelscope - INFO - skip HIFIGAN training...\n",
+ "2023-09-09 07:32:35,814 - modelscope - INFO - am_config=/content/Personal-TTS-v2/pretrain_work_dir/tmp_am/config.yaml voc_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/config.yaml\n",
+ "2023-09-09 07:32:35,815 - modelscope - INFO - audio_config=/content/Personal-TTS-v2/pretrain_work_dir/data/audio_config.yaml\n",
+ "2023-09-09 07:32:35,816 - modelscope - INFO - am_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/tmp_am/ckpt/checkpoint_2400000.pth'), (2400050, '/content/Personal-TTS-v2/pretrain_work_dir/tmp_am/ckpt/checkpoint_2400050.pth')])\n",
+ "2023-09-09 07:32:35,818 - modelscope - INFO - voc_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/ckpt/checkpoint_2400000.pth')])\n",
+ "2023-09-09 07:32:35,819 - modelscope - INFO - se_path=/content/Personal-TTS-v2/pretrain_work_dir/data/se/se.npy se_model_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/se.onnx\n",
+ "2023-09-09 07:32:35,820 - modelscope - INFO - mvn_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/mvn.npy\n",
+ "2023-09-09 07:32:40,074 - modelscope - WARNING - No preprocessor field found in cfg.\n",
+ "2023-09-09 07:32:40,076 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n",
+ "2023-09-09 07:32:40,077 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/content/Personal-TTS-v2/pretrain_work_dir/orig_model'}. trying to build by task and model information.\n",
+ "2023-09-09 07:32:40,078 - modelscope - WARNING - No preprocessor key ('sambert-hifigan', 'text-to-speech') found in PREPROCESSOR_MAP, skip building preprocessor.\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Removing weight norm...\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "'69792480141myfile.wav'"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "app = gr.Blocks()\n",
+ "\n",
+ "with app:\n",
+ " gr.Markdown(\"#