Skip to content

Commit e6cb1c1

Browse files
monikakusterivicac
authored andcommitted
1018 - MistralAI - OCR
1 parent 11cb13d commit e6cb1c1

File tree

3 files changed

+136
-1
lines changed

3 files changed

+136
-1
lines changed

server/libs/modules/components/ai/llm/mistral/src/main/java/com/bytechef/component/ai/llm/mistral/MistralComponentHandler.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import com.bytechef.component.ComponentHandler;
2222
import com.bytechef.component.ai.llm.mistral.action.MistralChatAction;
23+
import com.bytechef.component.ai.llm.mistral.action.MistralOcrAction;
2324
import com.bytechef.component.ai.llm.mistral.cluster.MistralAiChatModel;
2425
import com.bytechef.component.ai.llm.mistral.cluster.MistralAiEmbedding;
2526
import com.bytechef.component.ai.llm.mistral.connection.MistralConnection;
@@ -40,7 +41,9 @@ public class MistralComponentHandler implements ComponentHandler {
4041
.icon("path:assets/mistral.svg")
4142
.categories(ComponentCategory.ARTIFICIAL_INTELLIGENCE)
4243
.connection(MistralConnection.CONNECTION_DEFINITION)
43-
.actions(MistralChatAction.ACTION_DEFINITION)
44+
.actions(
45+
MistralChatAction.ACTION_DEFINITION,
46+
MistralOcrAction.ACTION_DEFINITION)
4447
.clusterElements(
4548
MistralAiEmbedding.CLUSTER_ELEMENT_DEFINITION,
4649
MistralAiChatModel.CLUSTER_ELEMENT_DEFINITION);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Copyright 2025 ByteChef
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.bytechef.component.ai.llm.mistral.action;
18+
19+
import static com.bytechef.component.ai.llm.constant.LLMConstants.MODEL;
20+
import static com.bytechef.component.ai.llm.mistral.constant.MistralConstants.TYPE;
21+
import static com.bytechef.component.ai.llm.mistral.constant.MistralConstants.URL;
22+
import static com.bytechef.component.definition.ComponentDsl.action;
23+
import static com.bytechef.component.definition.ComponentDsl.array;
24+
import static com.bytechef.component.definition.ComponentDsl.integer;
25+
import static com.bytechef.component.definition.ComponentDsl.object;
26+
import static com.bytechef.component.definition.ComponentDsl.option;
27+
import static com.bytechef.component.definition.ComponentDsl.outputSchema;
28+
import static com.bytechef.component.definition.ComponentDsl.string;
29+
30+
import com.bytechef.component.definition.ComponentDsl.ModifiableActionDefinition;
31+
import com.bytechef.component.definition.Context;
32+
import com.bytechef.component.definition.Context.Http;
33+
import com.bytechef.component.definition.Parameters;
34+
import java.util.Map;
35+
36+
/**
37+
* @author Monika Kušter
38+
*/
39+
public class MistralOcrAction {
40+
41+
public enum DocumentType {
42+
43+
DOCUMENT_URL("document_url"),
44+
IMAGE_URL("image_url");
45+
46+
private final String value;
47+
48+
DocumentType(String value) {
49+
this.value = value;
50+
}
51+
52+
public String getValue() {
53+
return value;
54+
}
55+
}
56+
57+
public static final ModifiableActionDefinition ACTION_DEFINITION = action("ocr")
58+
.title("Document OCR")
59+
.description("Extracts text and structured content from documents.")
60+
.properties(
61+
string(MODEL)
62+
.label("Model")
63+
.description("Model to use.")
64+
.defaultValue("mistral-ocr-latest")
65+
.required(true),
66+
string(TYPE)
67+
.label("Type")
68+
.description("Type of the document to run OCR on.")
69+
.options(
70+
option("Image", DocumentType.IMAGE_URL.getValue()),
71+
option("PDF", DocumentType.DOCUMENT_URL.getValue()))
72+
.defaultValue(DocumentType.IMAGE_URL.getValue())
73+
.required(true),
74+
string(URL)
75+
.label("Image URL")
76+
.description("Url of the image to run OCR on.")
77+
.displayCondition("%s == '%s'".formatted(TYPE, DocumentType.IMAGE_URL.getValue()))
78+
.required(true),
79+
string(URL)
80+
.label("Document URL")
81+
.description("Url of the document to run OCR on.")
82+
.displayCondition("%s == '%s'".formatted(TYPE, DocumentType.DOCUMENT_URL.getValue()))
83+
.required(true))
84+
.output(
85+
outputSchema(
86+
object()
87+
.properties(
88+
array("pages")
89+
.items(
90+
object()
91+
.properties(
92+
integer("index"),
93+
string("markdown"),
94+
array("images")
95+
.items(
96+
object()
97+
.properties(
98+
string("id"),
99+
integer("top_left_x"),
100+
integer("top_left_y"),
101+
integer("bottom_right_x"),
102+
integer("bottom_right_y"))),
103+
object("dimensions")
104+
.properties(
105+
integer("dpi"),
106+
integer("height"),
107+
integer("width")))),
108+
string("model"),
109+
object("usage_info")
110+
.properties(
111+
integer("pages_processed"),
112+
integer("doc_size_bytes")))))
113+
.perform(MistralOcrAction::perform);
114+
115+
private MistralOcrAction() {
116+
}
117+
118+
public static Object perform(Parameters inputParameters, Parameters connectionParameters, Context context) {
119+
String type = inputParameters.getRequiredString(TYPE);
120+
121+
return context.http(http -> http.post("https://api.mistral.ai/v1/ocr"))
122+
.body(
123+
Http.Body.of(
124+
MODEL, inputParameters.getRequiredString(MODEL),
125+
"document", Map.of(TYPE, type, type, inputParameters.getRequiredString(URL))))
126+
.configuration(Http.responseType(Http.ResponseType.JSON))
127+
.execute()
128+
.getBody();
129+
}
130+
}

server/libs/modules/components/ai/llm/mistral/src/main/java/com/bytechef/component/ai/llm/mistral/constant/MistralConstants.java

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
public final class MistralConstants {
3636

3737
public static final String SAFE_PROMPT = "safePrompt";
38+
public static final String TYPE = "type";
39+
public static final String URL = "url";
3840

3941
public static final List<Option<String>> CHAT_MODELS = ModelUtils.getEnumOptions(
4042
Arrays.stream(MistralAiApi.ChatModel.values())

0 commit comments

Comments
 (0)