11from asyncio import Protocol
22from pathlib import Path
3- from typing import Dict , Tuple
3+ from typing import Tuple , Optional , TypedDict
44
55from tarsier ._utils import load_js
66from tarsier .adapter import AnyDriver , BrowserAdapter , adapter_factory
77from tarsier .ocr import OCRService
88from tarsier .text_format import format_text
99
10- TagToXPath = Dict [int , str ]
10+
11+ class TagMetadata (TypedDict ):
12+ tarsier_id : int
13+ element_name : str
14+ opening_tag_html : str
15+ xpath : str
16+ element_text : Optional [str ]
17+ text_node_index : Optional [int ]
18+ id_symbol : str
19+ id_string : str
1120
1221
1322class ITarsier (Protocol ):
14- async def page_to_image (self , driver : AnyDriver ) -> Tuple [bytes , Dict [ int , str ]]:
23+ async def page_to_image (self , driver : AnyDriver ) -> Tuple [bytes , list [ TagMetadata ]]:
1524 raise NotImplementedError ()
1625
17- async def page_to_text (self , driver : AnyDriver ) -> Tuple [str , Dict [int , str ]]:
26+ async def page_to_text (self , driver : AnyDriver ) -> Tuple [str , list [TagMetadata ]]:
27+ raise NotImplementedError ()
28+
29+ async def page_to_image_and_text (
30+ self , driver : AnyDriver
31+ ) -> Tuple [bytes , str , list [TagMetadata ]]:
1832 raise NotImplementedError ()
1933
2034
@@ -31,10 +45,10 @@ async def page_to_image(
3145 tag_text_elements : bool = False ,
3246 tagless : bool = False ,
3347 keep_tags_showing : bool = False ,
34- ) -> Tuple [bytes , TagToXPath ]:
48+ ) -> Tuple [bytes , list [ TagMetadata ] ]:
3549 adapter = adapter_factory (driver )
3650 tag_to_xpath = (
37- await self ._tag_page (adapter , tag_text_elements ) if not tagless else {}
51+ await self ._tag_page (adapter , tag_text_elements ) if not tagless else []
3852 )
3953 if tagless :
4054 await self ._remove_tags (adapter )
@@ -44,15 +58,15 @@ async def page_to_image(
4458 if not keep_tags_showing :
4559 await self ._remove_tags (adapter )
4660
47- return screenshot , tag_to_xpath if not tagless else {}
61+ return screenshot , tag_to_xpath if not tagless else []
4862
4963 async def page_to_text (
5064 self ,
5165 driver : AnyDriver ,
5266 tag_text_elements : bool = False ,
5367 tagless : bool = False ,
5468 keep_tags_showing : bool = False ,
55- ) -> Tuple [str , TagToXPath ]:
69+ ) -> Tuple [str , list [ TagMetadata ] ]:
5670 image , tag_to_xpath = await self .page_to_image (
5771 driver , tag_text_elements , tagless , keep_tags_showing
5872 )
@@ -65,7 +79,7 @@ async def page_to_image_and_text(
6579 tag_text_elements : bool = False ,
6680 tagless : bool = False ,
6781 keep_tags_showing : bool = False ,
68- ) -> Tuple [bytes , str , TagToXPath ]:
82+ ) -> Tuple [bytes , str , list [ TagMetadata ] ]:
6983 image , tag_to_xpath = await self .page_to_image (
7084 driver , tag_text_elements , tagless , keep_tags_showing
7185 )
@@ -90,13 +104,26 @@ def _run_ocr(self, image: bytes) -> str:
90104
91105 async def _tag_page (
92106 self , adapter : BrowserAdapter , tag_text_elements : bool = False
93- ) -> Dict [ int , str ]:
107+ ) -> list [ TagMetadata ]:
94108 await self ._load_tarsier_utils (adapter )
95109
96110 script = f"return window.tagifyWebpage({ str (tag_text_elements ).lower ()} );"
97- tag_to_xpath = await adapter .run_js (script )
98-
99- return {int (key ): value for key , value in tag_to_xpath .items ()}
111+ tag_to_meta = await adapter .run_js (script )
112+
113+ tag_metadata_list = [
114+ TagMetadata (
115+ tarsier_id = meta ["tarsierId" ],
116+ element_name = meta ["elementName" ],
117+ opening_tag_html = meta ["openingTagHTML" ],
118+ xpath = meta ["xpath" ],
119+ element_text = meta .get ("elementText" ),
120+ text_node_index = meta .get ("textNodeIndex" ),
121+ id_symbol = meta ["idSymbol" ],
122+ id_string = meta ["idString" ],
123+ )
124+ for meta in tag_to_meta
125+ ]
126+ return tag_metadata_list
100127
101128 async def _remove_tags (self , adapter : BrowserAdapter ) -> None :
102129 await self ._load_tarsier_utils (adapter )
0 commit comments