Skip to content

Commit fe1c3b6

Browse files
committed
feat: Add asynchronous document enhancement method and adjust minimum page content length
1 parent 2324482 commit fe1c3b6

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_confluence_loader.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def __init__(
8383
self._extractor_api = extractor_api
8484
self._rag_api = rag_api
8585
self._settings = settings
86-
self._sanitize_document_name()
8786
self._key_value_store = key_value_store
8887
self._information_mapper = information_mapper
8988
self._information_enhancer = information_enhancer
@@ -157,6 +156,13 @@ async def process_confluence(index):
157156
for t in threads:
158157
t.join()
159158

159+
async def _aenhance_langchain_documents(self, documents: list[Document]):
160+
try:
161+
return await self._information_enhancer.ainvoke(documents)
162+
except Exception as e:
163+
logger.error("Exception occured while enhancing confluence langchain document %s" % e)
164+
raise e
165+
160166
async def _delete_previous_information_pieces(self, index=0):
161167
try:
162168
await self._document_deleter.adelete_document(self._settings.document_name[index])

extractor-api-lib/src/extractor_api_lib/impl/api_endpoints/default_confluence_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
class DefaultConfluenceExtractor(ConfluenceExtractor):
1414
"""Default implementation of the FileExtractor interface."""
1515

16-
MIN_PAGE_CONTENT_LENGTH = 20
16+
MIN_PAGE_CONTENT_LENGTH = 10
1717

1818
def __init__(
1919
self,

0 commit comments

Comments
 (0)