Skip to content

Commit

Permalink
[DONE] Ptitloup/improve subtitle accessibility (#1018)
Browse files Browse the repository at this point in the history
* create improveCaptionsAccessibility and call it before saving webvtt file to improve accessibility of subtitle

* remove trailing whitespace

* add js function to add choosing lang to transcript video and hide button if no language choosen

* add the specified language to the video before launch trancsript

* add some pydoc in the news functions - use same function to convert to timestamps

* add pydoc in good format
  • Loading branch information
ptitloup authored Jan 16, 2024
1 parent 519aaf9 commit d79ba51
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 22 deletions.
22 changes: 20 additions & 2 deletions pod/video/templates/videos/video_edit.html
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ <h2 class="accordion-header">
{% if field.name == "transcript" %}
{% if form.instance.id and form.instance.get_encoding_step == "" or form.instance.encoding_in_progress %}
{% else %}
{% if form.instance.slug != '' and form.instance.get_encoding_step == '0 : end of encoding' %}
<a class="btn btn-primary btn-sm ps-2 pe-2 m-1" type="button" href="{% url 'video:video_transcript' form.instance.slug %}">
{% if form.instance.slug != '' and form.instance.get_encoding_step|slice:":1" == '0' or 'no stt model' in form.instance.get_encoding_step %}
<a class="btn btn-primary btn-sm ps-2 pe-2 m-1" type="button" href="{% url 'video:video_transcript' form.instance.slug %}" id="restart_transcript">
<i class="bi bi-translate" aria-hidden="true"></i>
{% trans "Restart transcription" %}
</a>
Expand Down Expand Up @@ -296,6 +296,24 @@ <h2 class="h4 card-header card-title pod-card__title ps-2">{% trans "Help for fo
passwordInput.addEventListener("click", function() {
passwordInput.select();
});
const select_transcript = document.getElementById("id_transcript");
const transcript_button = document.getElementById("restart_transcript");
if(transcript_button) {
let transcript_button_href = transcript_button.href.split('?')[0];
function change_transcript() {
if(select_transcript.value == "") {
transcript_button.href = transcript_button_href;
transcript_button.classList.add("invisible");
} else {
transcript_button.classList.remove("invisible");
transcript_button.href = transcript_button_href + "?lang=" + select_transcript.value;
}
}
select_transcript.addEventListener("change", function(event){
change_transcript();
});
change_transcript();
}
</script>

{% if form.instance.encoding_in_progress and request.user.owner.accepts_notifications is not False %}
Expand Down
37 changes: 30 additions & 7 deletions pod/video/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,16 @@ def get_owners_has_instances(owners):


def owner_is_searchable(user):
"""Return if user is searchable according to HIDE_USER_FILTER setting and authenticated user"""
"""
Return if user is searchable according to HIDE_USER_FILTER setting
and authenticated user.
Args:
user (:class:`django.contrib.auth.models.User`): The user object
Returns:
bool: True if HIDE_USER_FILTER is False and user is authenticated, False otherwise
"""
return not HIDE_USER_FILTER and user.is_authenticated


Expand Down Expand Up @@ -1210,12 +1219,26 @@ def video_transcript(request, slug=None):
)
return redirect(reverse("video:video_edit", args=(video.slug,)))

if video.get_video_mp3():
transcript_video = getattr(transcript, TRANSCRIPT_VIDEO)
transcript_video(video.id)
messages.add_message(
request, messages.INFO, _("The video transcript has been restarted.")
)
if video.get_video_mp3() :
available_transcript_lang = [lang[0] for lang in get_transcription_choices()]
if (
request.GET.get("lang", "") != ""
and request.GET["lang"] in available_transcript_lang
):
if video.transcript != request.GET["lang"]:
video.transcript = request.GET["lang"]
video.save()
transcript_video = getattr(transcript, TRANSCRIPT_VIDEO)
transcript_video(video.id)
messages.add_message(
request, messages.INFO, _("The video transcript has been restarted.")
)
else:
messages.add_message(
request,
messages.ERROR,
_("An available transcription language must be specified."),
)

return redirect(reverse("video:video_edit", args=(video.slug,)))

Expand Down
111 changes: 111 additions & 0 deletions pod/video_encode_transcript/transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from django.core.files import File
from pod.completion.models import Track
from pod.main.tasks import task_start_transcript
from webvtt import Caption

from .utils import (
send_email,
Expand All @@ -19,6 +20,7 @@
or importlib.util.find_spec("whisper") is not None
):
from .transcript_model import start_transcripting
from .transcript_model import sec_to_timestamp

import os
import time
Expand Down Expand Up @@ -149,6 +151,7 @@ def saveVTT(video, webvtt):
temp_vtt_file = NamedTemporaryFile(suffix=".vtt")
webvtt.save(temp_vtt_file.name)
if webvtt.captions:
improveCaptionsAccessibility(webvtt)
msg += "\nstore vtt file in bdd with CustomFileModel model file field"
if __FILEPICKER__:
videodir, created = UserFolder.objects.get_or_create(
Expand Down Expand Up @@ -186,3 +189,111 @@ def saveVTT(video, webvtt):
else:
msg += "\nERROR SUBTITLES Output size is 0"
return msg


def improveCaptionsAccessibility(webvtt):
"""
Parse the vtt file in argument to render the caption conform to accessibility.
- see `https://github.com/knarf18/Bonnes-pratiques-du-sous-titrage/blob/master/Liste%20de%20bonnes%20pratiques.md` # noqa: E501
- 40 car maximum per ligne (CPL)
- 2 lines max by caption
Args:
webvtt (:class:`webvtt.WebVTT`): the webvtt file content
"""
new_captions = []
for caption in webvtt.captions:
sent = split_string(caption.text, 40, sep=" ")
if len(sent) > 2:
num_captions = int(len(sent) / 2)
if len(sent) % 2 :
num_captions += 1
dur = caption.end_in_seconds - caption.start_in_seconds
for x in range(num_captions):
new_cap = Caption()
new_cap.start = sec_to_timestamp(
caption.start_in_seconds + x * dur / num_captions
)
new_cap.end = sec_to_timestamp(
caption.start_in_seconds + (x + 1) * dur / num_captions
)
new_cap.text = get_cap_text(sent, x)
new_captions.append(new_cap)
else:
new_cap = Caption()
new_cap.start = caption.start
new_cap.end = caption.end
new_cap.text = "\n".join(sent)
new_captions.append(new_cap)
# remove all old captions
while len(webvtt.captions) > 0:
del webvtt.captions[0]
# add the new one
for cap in new_captions:
webvtt.captions.append(cap)
webvtt.save()


def get_cap_text(sent, x):
"""
Get the text in the sent array at the position gived in arg.
Args:
sent (list): The list of text
x (int): The position to extract
Returns:
str: The extracted text
"""
new_cap_text = sent[x * 2]
try:
new_cap_text += "\n" + sent[x * 2 + 1]
except IndexError:
pass
return new_cap_text


def pad(line, limit):
"""
Add some space at the end of line to specified limit.
Args:
line (str): A line of text
limit (int): The size of line
Returns:
str: the line with space at the end
"""
return line + " " * (limit - len(line))


def split_string(text, limit, sep=" "):
"""
Split text by word for specified limit.
Args:
text (str): the text of the caption
limit (int): size of line
sep (str): default " "
Returns:
array: list of words in the text
"""
words = text.split()
if max(map(len, words)) > limit:
raise ValueError("limit is too small")
res = []
part = words[0]
others = words[1:]
for word in others:
if len(sep) + len(word) > limit - len(part):
res.append(part)
part = word
else:
part += sep + word
if part:
res.append(part)
# add space to the end of line
result = [pad(line, limit) for line in res]
return result
27 changes: 14 additions & 13 deletions pod/video_encode_transcript/transcript_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ def words_to_vtt(
change_previous_end_caption(webvtt, start_caption)

caption = Caption(
format_time_caption(start_caption),
format_time_caption(stop_caption),
sec_to_timestamp(start_caption),
sec_to_timestamp(stop_caption),
" ".join(text_caption),
)

Expand All @@ -274,8 +274,8 @@ def words_to_vtt(
# on ajoute ici la dernière phrase de la vidéo
stop_caption = start_trim + words[-1][start_key] + last_word_duration
caption = Caption(
format_time_caption(start_caption),
format_time_caption(stop_caption),
sec_to_timestamp(start_caption),
sec_to_timestamp(stop_caption),
" ".join(text_caption),
)
webvtt.captions.append(caption)
Expand Down Expand Up @@ -312,8 +312,8 @@ def main_vosk_transcript(norm_mp3_file, duration, transript_model):
start_caption = words[0]["start"]
stop_caption = words[-1]["end"]
caption = Caption(
format_time_caption(start_caption),
format_time_caption(stop_caption),
sec_to_timestamp(start_caption),
sec_to_timestamp(stop_caption),
text,
)
webvtt.captions.append(caption)
Expand Down Expand Up @@ -422,8 +422,8 @@ def main_whisper_transcript(norm_mp3_file, lang):

for segment in transcription["segments"]:
caption = Caption(
format_time_caption(segment["start"]),
format_time_caption(segment["end"]),
sec_to_timestamp(segment["start"]),
sec_to_timestamp(segment["end"]),
segment["text"],
)
webvtt.captions.append(caption)
Expand All @@ -444,14 +444,15 @@ def change_previous_end_caption(webvtt, start_caption):
microseconds=prev_end.microsecond,
).total_seconds()
if td_prev_end > start_caption:
webvtt.captions[-1].end = format_time_caption(start_caption)
webvtt.captions[-1].end = sec_to_timestamp(start_caption)


def format_time_caption(time_caption):
def sec_to_timestamp(total_seconds):
"""Format time for webvtt caption."""
return (
dt.datetime.utcfromtimestamp(0) + timedelta(seconds=float(time_caption))
).strftime("%H:%M:%S.%f")[:-3]
hours = int(total_seconds / 3600)
minutes = int(total_seconds / 60 - hours * 60)
seconds = total_seconds - hours * 3600 - minutes * 60
return '{:02d}:{:02d}:{:06.3f}'.format(hours, minutes, seconds)


def get_text_caption(text_caption, last_word_added):
Expand Down

0 comments on commit d79ba51

Please sign in to comment.