Skip to content
This repository was archived by the owner on Apr 21, 2024. It is now read-only.

Commit bd22597

Browse files
committed
Don't summarize very short texts (#9)
1 parent 172c4c8 commit bd22597

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

src/jizt/config.py

+5
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,8 @@
4444
cast=Path,
4545
default=f"{ROOT_DIR}/language_detection/language_detection/models/lid.176.ftz"
4646
)
47+
48+
# Summarization params
49+
# Minimum number of words a text has to have to be summarized. This prevents
50+
# trying to summarize very short texts, which will yield bad results.
51+
MIN_WORDS_SOURCE: int = 20

src/jizt/summaries/views.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import logging
2424
from fastapi import (APIRouter, HTTPException, BackgroundTasks, Depends,
2525
Response, status)
26-
from jizt.config import LOG_LEVEL
26+
from jizt.config import LOG_LEVEL, MIN_WORDS_SOURCE
2727
from jizt.supported_languages import SupportedLanguage
2828
from jizt.language_detection.language_detection.language_detection import \
2929
LanguageDetectorSingleton
@@ -67,6 +67,12 @@ async def request_summary_view(
6767
if not request.source:
6868
raise HTTPException(status_code=status.HTTP_204_NO_CONTENT)
6969

70+
if len(request.source.split()) < MIN_WORDS_SOURCE:
71+
raise HTTPException(
72+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
73+
detail=f"source too short (<{MIN_WORDS_SOURCE} words)"
74+
)
75+
7076
language = lang_detector.detect(request.source).language
7177
if not SupportedLanguage.is_supported(language):
7278
raise HTTPException(

0 commit comments

Comments
 (0)