55import sys
66from io import BytesIO
77
8+ import numpy as np
89import requests
910from PIL import Image
10- import numpy as np
1111
1212from .image_downloader import ImageDownloader
1313
@@ -38,24 +38,24 @@ def ensure_installed(self, package_name):
3838
3939 def preprocess_image (self , image : Image .Image ) -> np .ndarray :
4040 # Convert to RGB if the image is not already in RGB mode
41- if image .mode != ' RGB' :
42- image = image .convert (' RGB' )
43-
41+ if image .mode != " RGB" :
42+ image = image .convert (" RGB" )
43+
4444 # Convert to numpy array
4545 image_np = np .array (image )
46-
46+
4747 # Ensure the image is 3D (height, width, channels)
4848 if image_np .ndim == 2 :
4949 image_np = np .expand_dims (image_np , axis = - 1 )
5050 image_np = np .repeat (image_np , 3 , axis = - 1 )
51-
51+
5252 return image_np
5353
5454 async def parse_image (self , image : Image .Image ) -> str :
5555 """Process w/ DonutProcessor and VisionEncoderDecoderModel"""
5656 # Preprocess the image
5757 image_np = self .preprocess_image (image )
58-
58+
5959 task_prompt = "<s_cord-v2>"
6060 decoder_input_ids = self .processor .tokenizer (
6161 task_prompt , add_special_tokens = False , return_tensors = "pt"
@@ -93,4 +93,4 @@ def download_image(self, url: str) -> Image.Image:
9393 """Download an image from URL."""
9494 response = requests .get (url )
9595 image = Image .open (BytesIO (response .content ))
96- return image
96+ return image
0 commit comments