11# -*- coding: utf-8 -*-
22import re
33import string
4- from typing import Callable , Optional , Pattern , List , Tuple
4+ from typing import Callable , Match , Optional , Pattern , List , Tuple
55from decimal import Decimal , InvalidOperation
66
77import attr
@@ -36,14 +36,15 @@ def fromstring(cls, price: Optional[str],
3636 ``price`` string, it could be **preferred** over a value extracted
3737 from ``currency_hint`` string.
3838 """
39- amount_text = extract_price_text (price ) if price is not None else None
39+ currency , source = _extract_currency_symbol (price , currency_hint )
40+ amount_text = extract_price_text (price , currency if source == price else None ) if price is not None else None
4041 amount_num = parse_number (amount_text ) if amount_text is not None else None
41- currency = extract_currency_symbol (price , currency_hint )
4242 if currency is not None :
43- currency = currency .strip ()
43+ currency = currency .group (0 ).strip ()
44+ assert isinstance (currency , str )
4445 return Price (
4546 amount = amount_num ,
46- currency = currency ,
47+ currency = currency , # type: ignore
4748 amount_text = amount_text ,
4849 )
4950
@@ -120,11 +121,11 @@ def or_regex(symbols: List[str]) -> Pattern:
120121_search_unsafe_currency = or_regex (OTHER_CURRENCY_SYMBOLS ).search
121122
122123
123- def extract_currency_symbol (price : Optional [str ],
124- currency_hint : Optional [str ]) -> Optional [str ]:
124+ def _extract_currency_symbol (price : Optional [str ], currency_hint : Optional [str ]) -> Tuple [Optional [Match ], Optional [str ]]:
125125 """
126- Guess currency symbol from extracted price and currency strings.
127- Return an empty string if symbol is not found.
126+ Guess the currency symbol from extracted price and currency strings.
127+ Return a (`match object`_, source_string) tuple with the symbol found and
128+ the string where it was found, or (None, None) if no symbol is found.
128129 """
129130 methods : List [Tuple [Callable , Optional [str ]]] = [
130131 (_search_safe_currency , price ),
@@ -142,17 +143,32 @@ def extract_currency_symbol(price: Optional[str],
142143 for meth , attr in methods :
143144 m = meth (attr ) if attr else None
144145 if m :
145- return m .group (0 )
146+ return m , attr
147+
148+ return None , None
146149
150+
151+ def extract_currency_symbol (price : Optional [str ],
152+ currency_hint : Optional [str ]) -> Optional [str ]:
153+ """
154+ Guess currency symbol from extracted price and currency strings.
155+ Return the symbol as found as a string, or None if no symbol is found.
156+ """
157+ match , _ = _extract_currency_symbol (price , currency_hint )
158+ if match :
159+ return match .group (0 )
147160 return None
148161
149162
150- def extract_price_text (price : str ) -> Optional [str ]:
163+ def extract_price_text (price : str , currency_match : Optional [ Match ] = None ) -> Optional [str ]:
151164 """
152165 Extract text of a price from a string which contains price and
153- maybe some other text. If multiple price-looking substrings are present,
154- the first is returned (FIXME: it is better to return a number
155- which is near a currency symbol).
166+ maybe some other text.
167+
168+ If a match object of the currency within the `price` string is provided,
169+ amounts before or after the matched currency substring are prioritized.
170+ Otherwise, if multiple price-looking substrings are present, the first is
171+ returned.
156172
157173 >>> extract_price_text("price: $12.99")
158174 '12.99'
@@ -189,16 +205,39 @@ def extract_price_text(price: str) -> Optional[str]:
189205 """ , price , re .VERBOSE )
190206 if m :
191207 return m .group (0 ).replace (' ' , '' )
208+
209+ def number_from_match (m ):
210+ return m .group (1 ).strip (',.' ).strip ()
211+
212+ if currency_match is not None :
213+
214+ m = re .search (r"""
215+ (\d[\d\s.,]*) # number, probably with thousand separators
216+ \s*$ # only match right before the currency symbol
217+ """ , price [:currency_match .start (0 )], re .VERBOSE )
218+ if m :
219+ return number_from_match (m )
220+
221+ m = re .search (r"""
222+ ^\s* # only match right after the currency symbol
223+ (\d[\d\s.,]*) # number, probably with thousand separators
224+ \s* # skip whitespace
225+ (?:[^%\d]|$) # capture next symbol - it shouldn't be %
226+ """ , price [currency_match .end (0 ):], re .VERBOSE )
227+ if m :
228+ return number_from_match (m )
229+
192230 m = re .search (r"""
193231 (\d[\d\s.,]*) # number, probably with thousand separators
194232 \s* # skip whitespace
195233 (?:[^%\d]|$) # capture next symbol - it shouldn't be %
196234 """ , price , re .VERBOSE )
197-
198235 if m :
199- return m .group (1 ).strip (',.' ).strip ()
236+ return number_from_match (m )
237+
200238 if 'free' in price .lower ():
201239 return '0'
240+
202241 return None
203242
204243
0 commit comments