Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinWang676 authored Jun 11, 2023
1 parent 1bc93d4 commit f7ba918
Show file tree
Hide file tree
Showing 31 changed files with 1,748 additions and 0 deletions.
2 changes: 2 additions & 0 deletions bark/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .api import generate_audio, text_to_semantic, semantic_to_waveform, save_as_prompt
from .generation import SAMPLE_RATE, preload_models
158 changes: 158 additions & 0 deletions bark/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from typing import Dict, Optional, Union

import numpy as np

from .generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic


def generate_with_settings(text_prompt, semantic_temp=0.6, eos_p=0.2, coarse_temp=0.7, fine_temp=0.5, voice_name=None, output_full=False):

# generation with more control
x_semantic = generate_text_semantic(
text_prompt,
history_prompt=voice_name,
temp=semantic_temp,
min_eos_p = eos_p,
use_kv_caching=True
)

x_coarse_gen = generate_coarse(
x_semantic,
history_prompt=voice_name,
temp=coarse_temp,
use_kv_caching=True
)
x_fine_gen = generate_fine(
x_coarse_gen,
history_prompt=voice_name,
temp=fine_temp,
)

if output_full:
full_generation = {
'semantic_prompt': x_semantic,
'coarse_prompt': x_coarse_gen,
'fine_prompt': x_fine_gen
}
return full_generation, codec_decode(x_fine_gen)
return codec_decode(x_fine_gen)


def text_to_semantic(
text: str,
history_prompt: Optional[Union[Dict, str]] = None,
temp: float = 0.7,
silent: bool = False,
):
"""Generate semantic array from text.
Args:
text: text to be turned into audio
history_prompt: history choice for audio cloning
temp: generation temperature (1.0 more diverse, 0.0 more conservative)
silent: disable progress bar
Returns:
numpy semantic array to be fed into `semantic_to_waveform`
"""
x_semantic = generate_text_semantic(
text,
history_prompt=history_prompt,
temp=temp,
silent=silent,
use_kv_caching=True
)
return x_semantic


def semantic_to_waveform(
semantic_tokens: np.ndarray,
history_prompt: Optional[Union[Dict, str]] = None,
temp: float = 0.7,
silent: bool = False,
output_full: bool = False,
):
"""Generate audio array from semantic input.
Args:
semantic_tokens: semantic token output from `text_to_semantic`
history_prompt: history choice for audio cloning
temp: generation temperature (1.0 more diverse, 0.0 more conservative)
silent: disable progress bar
output_full: return full generation to be used as a history prompt
Returns:
numpy audio array at sample frequency 24khz
"""
coarse_tokens = generate_coarse(
semantic_tokens,
history_prompt=history_prompt,
temp=temp,
silent=silent,
use_kv_caching=True
)
fine_tokens = generate_fine(
coarse_tokens,
history_prompt=history_prompt,
temp=0.5,
)
audio_arr = codec_decode(fine_tokens)
if output_full:
full_generation = {
"semantic_prompt": semantic_tokens,
"coarse_prompt": coarse_tokens,
"fine_prompt": fine_tokens,
}
return full_generation, audio_arr
return audio_arr


def save_as_prompt(filepath, full_generation):
assert(filepath.endswith(".npz"))
assert(isinstance(full_generation, dict))
assert("semantic_prompt" in full_generation)
assert("coarse_prompt" in full_generation)
assert("fine_prompt" in full_generation)
np.savez(filepath, **full_generation)


def generate_audio(
text: str,
history_prompt: Optional[Union[Dict, str]] = None,
text_temp: float = 0.7,
waveform_temp: float = 0.7,
silent: bool = False,
output_full: bool = False,
):
"""Generate audio array from input text.
Args:
text: text to be turned into audio
history_prompt: history choice for audio cloning
text_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
waveform_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
silent: disable progress bar
output_full: return full generation to be used as a history prompt
Returns:
numpy audio array at sample frequency 24khz
"""
semantic_tokens = text_to_semantic(
text,
history_prompt=history_prompt,
temp=text_temp,
silent=silent,
)
out = semantic_to_waveform(
semantic_tokens,
history_prompt=history_prompt,
temp=waveform_temp,
silent=silent,
output_full=output_full,
)
if output_full:
full_generation, audio_arr = out
return full_generation, audio_arr
else:
audio_arr = out
return audio_arr
Binary file added bark/assets/prompts/announcer.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_0.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_1.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_2.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_3.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_4.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_5.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_6.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_7.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_8.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/en_speaker_9.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_0.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_1.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_2.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_3.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_4.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_5.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_6.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_7.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_8.npz
Binary file not shown.
Binary file added bark/assets/prompts/v2/zh_speaker_9.npz
Binary file not shown.
Loading

0 comments on commit f7ba918

Please sign in to comment.