[DONE] Ptitloup/improve subtitle accessibility (#1018)

* create improveCaptionsAccessibility and call it before saving webvtt file to improve accessibility of subtitle * remove trailing whitespace * add js function to add choosing lang to transcript video and hide button if no language choosen * add the specified language to the video before launch trancsript * add some pydoc in the news functions - use same function to convert to timestamps * add pydoc in good format
EsupPortail · Jan 16, 2024 · d79ba51 · d79ba51
1 parent 519aaf9
commit d79ba51
Show file tree

Hide file tree

Showing 4 changed files with 175 additions and 22 deletions.
diff --git a/pod/video/templates/videos/video_edit.html b/pod/video/templates/videos/video_edit.html
@@ -132,8 +132,8 @@ <h2 class="accordion-header">
               {% if field.name == "transcript"  %}
                 {% if form.instance.id and form.instance.get_encoding_step == ""  or form.instance.encoding_in_progress %}
                 {% else %}
-                  {% if form.instance.slug != '' and form.instance.get_encoding_step == '0 : end of encoding' %}
-                    <a class="btn btn-primary btn-sm ps-2 pe-2 m-1" type="button" href="{% url 'video:video_transcript' form.instance.slug %}">
+                  {% if form.instance.slug != '' and form.instance.get_encoding_step|slice:":1" == '0' or 'no stt model' in form.instance.get_encoding_step %}
+                    <a class="btn btn-primary btn-sm ps-2 pe-2 m-1" type="button" href="{% url 'video:video_transcript' form.instance.slug %}" id="restart_transcript">
                       <i class="bi bi-translate" aria-hidden="true"></i>
                       {% trans "Restart transcription" %}
                     </a>
@@ -296,6 +296,24 @@ <h2 class="h4 card-header card-title pod-card__title ps-2">{% trans "Help for fo
     passwordInput.addEventListener("click", function() {
       passwordInput.select();
     });
+    const select_transcript = document.getElementById("id_transcript");
+    const transcript_button = document.getElementById("restart_transcript");
+    if(transcript_button) {
+      let transcript_button_href = transcript_button.href.split('?')[0];
+      function change_transcript() {
+        if(select_transcript.value == "") {
+          transcript_button.href = transcript_button_href;
+          transcript_button.classList.add("invisible");
+        } else {
+          transcript_button.classList.remove("invisible");
+          transcript_button.href = transcript_button_href + "?lang=" + select_transcript.value;
+        }
+      }
+      select_transcript.addEventListener("change", function(event){
+        change_transcript();
+      });
+      change_transcript();
+    }
   </script>
 
   {% if form.instance.encoding_in_progress and request.user.owner.accepts_notifications is not False %}

diff --git a/pod/video/views.py b/pod/video/views.py
@@ -636,7 +636,16 @@ def get_owners_has_instances(owners):
 
 
 def owner_is_searchable(user):
-    """Return if user is searchable according to HIDE_USER_FILTER setting and authenticated user"""
+    """
+    Return if user is searchable according to HIDE_USER_FILTER setting
+    and authenticated user.
+
+    Args:
+        user (:class:`django.contrib.auth.models.User`): The user object
+
+    Returns:
+        bool: True if HIDE_USER_FILTER is False and user is authenticated, False otherwise
+    """
     return not HIDE_USER_FILTER and user.is_authenticated
 
 
@@ -1210,12 +1219,26 @@ def video_transcript(request, slug=None):
         )
         return redirect(reverse("video:video_edit", args=(video.slug,)))
 
-    if video.get_video_mp3():
-        transcript_video = getattr(transcript, TRANSCRIPT_VIDEO)
-        transcript_video(video.id)
-        messages.add_message(
-            request, messages.INFO, _("The video transcript has been restarted.")
-        )
+    if video.get_video_mp3() :
+        available_transcript_lang = [lang[0] for lang in get_transcription_choices()]
+        if (
+            request.GET.get("lang", "") != ""
+            and request.GET["lang"] in available_transcript_lang
+        ):
+            if video.transcript != request.GET["lang"]:
+                video.transcript = request.GET["lang"]
+                video.save()
+            transcript_video = getattr(transcript, TRANSCRIPT_VIDEO)
+            transcript_video(video.id)
+            messages.add_message(
+                request, messages.INFO, _("The video transcript has been restarted.")
+            )
+        else:
+            messages.add_message(
+                request,
+                messages.ERROR,
+                _("An available transcription language must be specified."),
+            )
 
     return redirect(reverse("video:video_edit", args=(video.slug,)))
 

diff --git a/pod/video_encode_transcript/transcript.py b/pod/video_encode_transcript/transcript.py
@@ -3,6 +3,7 @@
 from django.core.files import File
 from pod.completion.models import Track
 from pod.main.tasks import task_start_transcript
+from webvtt import Caption
 
 from .utils import (
     send_email,
@@ -19,6 +20,7 @@
     or importlib.util.find_spec("whisper") is not None
 ):
     from .transcript_model import start_transcripting
+    from .transcript_model import sec_to_timestamp
 
 import os
 import time
@@ -149,6 +151,7 @@ def saveVTT(video, webvtt):
     temp_vtt_file = NamedTemporaryFile(suffix=".vtt")
     webvtt.save(temp_vtt_file.name)
     if webvtt.captions:
+        improveCaptionsAccessibility(webvtt)
         msg += "\nstore vtt file in bdd with CustomFileModel model file field"
         if __FILEPICKER__:
             videodir, created = UserFolder.objects.get_or_create(
@@ -186,3 +189,111 @@ def saveVTT(video, webvtt):
     else:
         msg += "\nERROR SUBTITLES Output size is 0"
     return msg
+
+
+def improveCaptionsAccessibility(webvtt):
+    """
+    Parse the vtt file in argument to render the caption conform to accessibility.
+    - see `https://github.com/knarf18/Bonnes-pratiques-du-sous-titrage/blob/master/Liste%20de%20bonnes%20pratiques.md` # noqa: E501
+    - 40 car maximum per ligne (CPL)
+    - 2 lines max by caption
+
+    Args:
+        webvtt (:class:`webvtt.WebVTT`): the webvtt file content
+
+    """
+    new_captions = []
+    for caption in webvtt.captions:
+        sent = split_string(caption.text, 40, sep=" ")
+        if len(sent) > 2:
+            num_captions = int(len(sent) / 2)
+            if len(sent) % 2 :
+                num_captions += 1
+            dur = caption.end_in_seconds - caption.start_in_seconds
+            for x in range(num_captions):
+                new_cap = Caption()
+                new_cap.start = sec_to_timestamp(
+                    caption.start_in_seconds + x * dur / num_captions
+                )
+                new_cap.end = sec_to_timestamp(
+                    caption.start_in_seconds + (x + 1) * dur / num_captions
+                )
+                new_cap.text = get_cap_text(sent, x)
+                new_captions.append(new_cap)
+        else:
+            new_cap = Caption()
+            new_cap.start = caption.start
+            new_cap.end = caption.end
+            new_cap.text = "\n".join(sent)
+            new_captions.append(new_cap)
+    # remove all old captions
+    while len(webvtt.captions) > 0:
+        del webvtt.captions[0]
+    # add the new one
+    for cap in new_captions:
+        webvtt.captions.append(cap)
+    webvtt.save()
+
+
+def get_cap_text(sent, x):
+    """
+    Get the text in the sent array at the position gived in arg.
+
+    Args:
+        sent (list): The list of text
+        x (int): The position to extract
+
+    Returns:
+        str: The extracted text
+    """
+    new_cap_text = sent[x * 2]
+    try:
+        new_cap_text += "\n" + sent[x * 2 + 1]
+    except IndexError:
+        pass
+    return new_cap_text
+
+
+def pad(line, limit):
+    """
+    Add some space at the end of line to specified limit.
+
+    Args:
+        line (str): A line of text
+        limit (int): The size of line
+
+    Returns:
+        str: the line with space at the end
+    """
+    return line + " " * (limit - len(line))
+
+
+def split_string(text, limit, sep=" "):
+    """
+    Split text by word for specified limit.
+
+    Args:
+        text (str): the text of the caption
+        limit (int): size of line
+        sep (str): default " "
+
+    Returns:
+        array: list of words in the text
+    """
+    words = text.split()
+    if max(map(len, words)) > limit:
+        raise ValueError("limit is too small")
+    res = []
+    part = words[0]
+    others = words[1:]
+    for word in others:
+        if len(sep) + len(word) > limit - len(part):
+            res.append(part)
+            part = word
+        else:
+            part += sep + word
+    if part:
+        res.append(part)
+    # add space to the end of line
+    result = [pad(line, limit) for line in res]
+    return result
diff --git a/pod/video_encode_transcript/transcript_model.py b/pod/video_encode_transcript/transcript_model.py
@@ -260,8 +260,8 @@ def words_to_vtt(
             change_previous_end_caption(webvtt, start_caption)
 
             caption = Caption(
-                format_time_caption(start_caption),
-                format_time_caption(stop_caption),
+                sec_to_timestamp(start_caption),
+                sec_to_timestamp(stop_caption),
                 " ".join(text_caption),
             )
 
@@ -274,8 +274,8 @@ def words_to_vtt(
         # on ajoute ici la dernière phrase de la vidéo
         stop_caption = start_trim + words[-1][start_key] + last_word_duration
         caption = Caption(
-            format_time_caption(start_caption),
-            format_time_caption(stop_caption),
+            sec_to_timestamp(start_caption),
+            sec_to_timestamp(stop_caption),
             " ".join(text_caption),
         )
         webvtt.captions.append(caption)
@@ -312,8 +312,8 @@ def main_vosk_transcript(norm_mp3_file, duration, transript_model):
             start_caption = words[0]["start"]
             stop_caption = words[-1]["end"]
             caption = Caption(
-                format_time_caption(start_caption),
-                format_time_caption(stop_caption),
+                sec_to_timestamp(start_caption),
+                sec_to_timestamp(stop_caption),
                 text,
             )
             webvtt.captions.append(caption)
@@ -422,8 +422,8 @@ def main_whisper_transcript(norm_mp3_file, lang):
 
     for segment in transcription["segments"]:
         caption = Caption(
-            format_time_caption(segment["start"]),
-            format_time_caption(segment["end"]),
+            sec_to_timestamp(segment["start"]),
+            sec_to_timestamp(segment["end"]),
             segment["text"],
         )
         webvtt.captions.append(caption)
@@ -444,14 +444,15 @@ def change_previous_end_caption(webvtt, start_caption):
             microseconds=prev_end.microsecond,
         ).total_seconds()
         if td_prev_end > start_caption:
-            webvtt.captions[-1].end = format_time_caption(start_caption)
+            webvtt.captions[-1].end = sec_to_timestamp(start_caption)
 
 
-def format_time_caption(time_caption):
+def sec_to_timestamp(total_seconds):
     """Format time for webvtt caption."""
-    return (
-        dt.datetime.utcfromtimestamp(0) + timedelta(seconds=float(time_caption))
-    ).strftime("%H:%M:%S.%f")[:-3]
+    hours = int(total_seconds / 3600)
+    minutes = int(total_seconds / 60 - hours * 60)
+    seconds = total_seconds - hours * 3600 - minutes * 60
+    return '{:02d}:{:02d}:{:06.3f}'.format(hours, minutes, seconds)
 
 
 def get_text_caption(text_caption, last_word_added):