Skip to content

Commit

Permalink
[ArrowStringArray] CLN: remove hasattr checks (pandas-dev#41327)
Browse files Browse the repository at this point in the history
  • Loading branch information
simonjayhawkins authored May 5, 2021
1 parent b8f8955 commit a43c42c
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 55 deletions.
10 changes: 10 additions & 0 deletions pandas/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
np_version_under1p19,
np_version_under1p20,
)
from pandas.compat.pyarrow import (
pa_version_under1p0,
pa_version_under2p0,
pa_version_under3p0,
pa_version_under4p0,
)

PY38 = sys.version_info >= (3, 8)
PY39 = sys.version_info >= (3, 9)
Expand Down Expand Up @@ -136,4 +142,8 @@ def get_lzma_file(lzma):
"np_version_under1p18",
"np_version_under1p19",
"np_version_under1p20",
"pa_version_under1p0",
"pa_version_under2p0",
"pa_version_under3p0",
"pa_version_under4p0",
]
18 changes: 18 additions & 0 deletions pandas/compat/pyarrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
""" support pyarrow compatibility across versions """

from distutils.version import LooseVersion

try:
import pyarrow as pa

_pa_version = pa.__version__
_palv = LooseVersion(_pa_version)
pa_version_under1p0 = _palv < LooseVersion("1.0.0")
pa_version_under2p0 = _palv < LooseVersion("2.0.0")
pa_version_under3p0 = _palv < LooseVersion("3.0.0")
pa_version_under4p0 = _palv < LooseVersion("4.0.0")
except ImportError:
pa_version_under1p0 = True
pa_version_under2p0 = True
pa_version_under3p0 = True
pa_version_under4p0 = True
105 changes: 50 additions & 55 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
Scalar,
type_t,
)
from pandas.compat import (
pa_version_under2p0,
pa_version_under3p0,
pa_version_under4p0,
)
from pandas.util._decorators import doc
from pandas.util._validators import validate_fillna_kwargs

Expand Down Expand Up @@ -667,9 +672,7 @@ def take(
return type(self)(self._data.take(indices_array))

def isin(self, values):

# pyarrow.compute.is_in added in pyarrow 2.0.0
if not hasattr(pc, "is_in"):
if pa_version_under2p0:
return super().isin(values)

value_set = [
Expand All @@ -684,7 +687,7 @@ def isin(self, values):
return np.zeros(len(self), dtype=bool)

kwargs = {}
if LooseVersion(pa.__version__) < "3.0.0":
if pa_version_under3p0:
# in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
# with unexpected keyword argument in pyarrow 3.0.0+
kwargs["skip_null"] = True
Expand Down Expand Up @@ -802,11 +805,10 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
return super()._str_contains(pat, case, flags, na, regex)

if regex:
# match_substring_regex added in pyarrow 4.0.0
if hasattr(pc, "match_substring_regex") and case:
result = pc.match_substring_regex(self._data, pat)
else:
if pa_version_under4p0 or case is False:
return super()._str_contains(pat, case, flags, na, regex)
else:
result = pc.match_substring_regex(self._data, pat)
else:
if case:
result = pc.match_substring(self._data, pat)
Expand All @@ -818,27 +820,25 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):
return result

def _str_startswith(self, pat, na=None):
# match_substring_regex added in pyarrow 4.0.0
if hasattr(pc, "match_substring_regex"):
result = pc.match_substring_regex(self._data, "^" + re.escape(pat))
result = BooleanDtype().__from_arrow__(result)
if not isna(na):
result[isna(result)] = bool(na)
return result
else:
if pa_version_under4p0:
return super()._str_startswith(pat, na)

result = pc.match_substring_regex(self._data, "^" + re.escape(pat))
result = BooleanDtype().__from_arrow__(result)
if not isna(na):
result[isna(result)] = bool(na)
return result

def _str_endswith(self, pat, na=None):
# match_substring_regex added in pyarrow 4.0.0
if hasattr(pc, "match_substring_regex"):
result = pc.match_substring_regex(self._data, re.escape(pat) + "$")
result = BooleanDtype().__from_arrow__(result)
if not isna(na):
result[isna(result)] = bool(na)
return result
else:
if pa_version_under4p0:
return super()._str_endswith(pat, na)

result = pc.match_substring_regex(self._data, re.escape(pat) + "$")
result = BooleanDtype().__from_arrow__(result)
if not isna(na):
result[isna(result)] = bool(na)
return result

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
):
Expand Down Expand Up @@ -871,13 +871,12 @@ def _str_isnumeric(self):
return BooleanDtype().__from_arrow__(result)

def _str_isspace(self):
# utf8_is_space added in pyarrow 2.0.0
if hasattr(pc, "utf8_is_space"):
result = pc.utf8_is_space(self._data)
return BooleanDtype().__from_arrow__(result)
else:
if pa_version_under2p0:
return super()._str_isspace()

result = pc.utf8_is_space(self._data)
return BooleanDtype().__from_arrow__(result)

def _str_istitle(self):
result = pc.utf8_is_title(self._data)
return BooleanDtype().__from_arrow__(result)
Expand All @@ -887,48 +886,44 @@ def _str_isupper(self):
return BooleanDtype().__from_arrow__(result)

def _str_len(self):
# utf8_length added in pyarrow 4.0.0
if hasattr(pc, "utf8_length"):
result = pc.utf8_length(self._data)
return Int64Dtype().__from_arrow__(result)
else:
if pa_version_under4p0:
return super()._str_len()

result = pc.utf8_length(self._data)
return Int64Dtype().__from_arrow__(result)

def _str_lower(self):
return type(self)(pc.utf8_lower(self._data))

def _str_upper(self):
return type(self)(pc.utf8_upper(self._data))

def _str_strip(self, to_strip=None):
if pa_version_under4p0:
return super()._str_strip(to_strip)

if to_strip is None:
# utf8_trim_whitespace added in pyarrow 4.0.0
if hasattr(pc, "utf8_trim_whitespace"):
return type(self)(pc.utf8_trim_whitespace(self._data))
result = pc.utf8_trim_whitespace(self._data)
else:
# utf8_trim added in pyarrow 4.0.0
if hasattr(pc, "utf8_trim"):
return type(self)(pc.utf8_trim(self._data, characters=to_strip))
return super()._str_strip(to_strip)
result = pc.utf8_trim(self._data, characters=to_strip)
return type(self)(result)

def _str_lstrip(self, to_strip=None):
if pa_version_under4p0:
return super()._str_lstrip(to_strip)

if to_strip is None:
# utf8_ltrim_whitespace added in pyarrow 4.0.0
if hasattr(pc, "utf8_ltrim_whitespace"):
return type(self)(pc.utf8_ltrim_whitespace(self._data))
result = pc.utf8_ltrim_whitespace(self._data)
else:
# utf8_ltrim added in pyarrow 4.0.0
if hasattr(pc, "utf8_ltrim"):
return type(self)(pc.utf8_ltrim(self._data, characters=to_strip))
return super()._str_lstrip(to_strip)
result = pc.utf8_ltrim(self._data, characters=to_strip)
return type(self)(result)

def _str_rstrip(self, to_strip=None):
if pa_version_under4p0:
return super()._str_rstrip(to_strip)

if to_strip is None:
# utf8_rtrim_whitespace added in pyarrow 4.0.0
if hasattr(pc, "utf8_rtrim_whitespace"):
return type(self)(pc.utf8_rtrim_whitespace(self._data))
result = pc.utf8_rtrim_whitespace(self._data)
else:
# utf8_rtrim added in pyarrow 4.0.0
if hasattr(pc, "utf8_rtrim"):
return type(self)(pc.utf8_rtrim(self._data, characters=to_strip))
return super()._str_rstrip(to_strip)
result = pc.utf8_rtrim(self._data, characters=to_strip)
return type(self)(result)

0 comments on commit a43c42c

Please sign in to comment.