Skip to content

Commit

Permalink
Fix ssml markup additions to non-ssml text
Browse files Browse the repository at this point in the history
Note, this is perhaps not the best way to indicate the current text
format.
  • Loading branch information
eliasbe committed Dec 6, 2024
1 parent df1991b commit 13c84a0
Showing 1 changed file with 24 additions and 11 deletions.
35 changes: 24 additions & 11 deletions src/icespeak/transcribe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
PunctuationTuple,
)

from icespeak.settings import TRACE
from icespeak.settings import SETTINGS, TRACE, TextFormats

from .num import (
ROMAN_NUMERALS,
Expand Down Expand Up @@ -874,6 +874,7 @@ def spell(
cls,
txt: str,
*,
text_format: TextFormats = SETTINGS.DEFAULT_TEXT_FORMAT,
pause_length: str | None = None,
literal: bool = False,
) -> str:
Expand All @@ -890,15 +891,18 @@ def spell(
# Non-literal spelling
f = lambda c: cls._CHAR_PRONUNCIATION.get(c.lower(), c) if not c.isspace() else ""
t = tuple(map(f, txt))
return (
cls.vbreak(time="10ms")
+ cls.vbreak(time=pause_length or "20ms").join(t)
+ cls.vbreak(time="20ms" if len(t) > 1 else "10ms")
)
if text_format == "ssml":
return (
cls.vbreak(time="10ms")
+ cls.vbreak(time=pause_length or "20ms").join(t)
+ cls.vbreak(time="20ms" if len(t) > 1 else "10ms")
)
else:
return " ".join(t)

@classmethod
@_transcribe_method
def abbrev(cls, txt: str) -> str:
def abbrev(cls, txt: str, *, text_format: TextFormats = SETTINGS.DEFAULT_TEXT_FORMAT) -> str:
"""Expand an abbreviation."""
meanings = tuple(
filter(
Expand All @@ -908,7 +912,10 @@ def abbrev(cls, txt: str) -> str:
)
if meanings:
# Abbreviation has at least one known meaning, expand it
return cls.vbreak(time="10ms") + meanings[0].stofn + cls.vbreak(time="50ms")
if text_format == "ssml":
return cls.vbreak(time="10ms") + meanings[0].stofn + cls.vbreak(time="50ms")
else:
return meanings[0].stofn

# Fallbacks:
# - Spell out, if any letter is uppercase (e.g. "MSc")
Expand Down Expand Up @@ -1111,7 +1118,9 @@ def entity(cls, txt: str) -> str:
@_transcribe_method
@_bool_args("full_text")
@lru_cache(maxsize=50) # Caching, as this method could be slow
def parser_transcribe(cls, txt: str, *, full_text: bool = False) -> str:
def parser_transcribe(
cls, txt: str, *, full_text: bool = False, text_format: TextFormats = SETTINGS.DEFAULT_TEXT_FORMAT
) -> str:
"""
Slow transcription of Icelandic text for TTS.
Utilizes the parser from the GreynirPackage library.
Expand Down Expand Up @@ -1175,7 +1184,11 @@ def _percent(tok: Tok, term: SimpleTree | None) -> str:

def _numwletter(tok: Tok, term: SimpleTree | None) -> str:
num = "".join(filter(lambda c: c.isdecimal(), tok.txt))
return cls.number(num, case="nf", gender="hk") + " " + cls.spell(tok.txt[len(num) + 1 :])
return (
cls.number(num, case="nf", gender="hk")
+ " "
+ cls.spell(tok.txt[len(num) + 1 :], text_format=text_format)
)

# Map certain terminals directly to transcription functions
handler_map: Mapping[int, Callable[[Tok, SimpleTree | None], str]] = {
Expand Down Expand Up @@ -1235,7 +1248,7 @@ def _numwletter(tok: Tok, term: SimpleTree | None) -> str:
or any(not _ABBREV_RE.match(m.stofn) for m in tok.meanings)
):
# Probably an abbreviation such as "t.d." or "MSc"
s_parts.append(cls.abbrev(txt))
s_parts.append(cls.abbrev(txt, text_format=text_format))

# Check whether this is a hyphen denoting a range
elif (
Expand Down

0 comments on commit 13c84a0

Please sign in to comment.