Skip to content
This repository was archived by the owner on Nov 22, 2022. It is now read-only.

Commit b555fdb

Browse files
committed
Use Pandas Protocols for udfs
1 parent c599954 commit b555fdb

File tree

3 files changed

+21
-19
lines changed

3 files changed

+21
-19
lines changed

test-data/unit/sql-udf.test

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
[case scalarUDF]
22
from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType
3+
import pandas.core.series # type: ignore[import]
34

45
@pandas_udf("str", PandasUDFType.SCALAR)
5-
def f(x: str) -> str:
6+
def f(x: pandas.core.series.Series) -> pandas.core.series.Series:
67
return x
78

89
@pandas_udf("str", PandasUDFType.SCALAR)
9-
def g(x: str, y: str) -> str:
10+
def g(x: pandas.core.series.Series, y: pandas.core.series.Series) -> pandas.core.series.Series:
1011
return x
1112

1213
@pandas_udf("str", PandasUDFType.SCALAR)
13-
def h(*xs: str) -> str:
14+
def h(*xs: pandas.core.series.Series) -> pandas.core.series.Series:
1415
return xs[0]
1516

1617
pandas_udf(lambda x: x, "str", PandasUDFType.SCALAR)

third_party/3/pyspark/sql/_typing.pyi

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import pyspark.sql.column
99
import pyspark.sql.types
1010
from pyspark.sql.column import Column
1111

12+
from pyspark.sql.pandas._typing import DataFrameLike, SeriesLike
1213
import pandas.core.frame # type: ignore
1314
import pandas.core.series # type: ignore
1415

@@ -41,30 +42,30 @@ PandasGroupedAggUDFType = Literal[202]
4142
PandasMapIterUDFType = Literal[205]
4243

4344
class PandasVariadicScalarToScalarFunction(Protocol):
44-
def __call__(self, *_: pandas.core.series.Series) -> pandas.core.series.Series:
45+
def __call__(self, *_: SeriesLike) -> SeriesLike:
4546
...
4647

47-
PandasScalarToScalarFunction = Union[PandasVariadicScalarToScalarFunction, Callable[[pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series]]
48+
PandasScalarToScalarFunction = Union[PandasVariadicScalarToScalarFunction, Callable[[SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], SeriesLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], SeriesLike]]
4849

4950
class PandasVariadicScalarToStructFunction(Protocol):
50-
def __call__(self, *_: pandas.core.series.Series) -> pandas.core.frame.DataFrame:
51+
def __call__(self, *_: SeriesLike) -> DataFrameLike:
5152
...
5253

53-
PandasScalarToStructFunction = Union[PandasVariadicScalarToStructFunction, Callable[[pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame]]
54+
PandasScalarToStructFunction = Union[PandasVariadicScalarToStructFunction, Callable[[SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], DataFrameLike], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], DataFrameLike]]
5455

55-
PandasScalarIterFunction = Callable[[Iterable[Union[pandas.core.series.Series, Tuple[pandas.core.series.Series, ...], pandas.core.frame.DataFrame]]], Iterable[pandas.core.series.Series]]
56+
PandasScalarIterFunction = Callable[[Iterable[Union[SeriesLike, Tuple[SeriesLike, ...], DataFrameLike]]], Iterable[SeriesLike]]
5657

57-
PandasGroupedMapFunction = Union[Callable[[pandas.core.frame.DataFrame], pandas.core.frame.DataFrame], Callable[[Any, pandas.core.frame.DataFrame], pandas.core.frame.DataFrame]]
58+
PandasGroupedMapFunction = Union[Callable[[DataFrameLike], DataFrameLike], Callable[[Any, DataFrameLike], DataFrameLike]]
5859

5960
class PandasVariadicGroupedAggFunction(Protocol):
60-
def __call__(self, *_: pandas.core.series.Series) -> LiteralType:
61+
def __call__(self, *_: SeriesLike) -> LiteralType:
6162
...
6263

63-
PandasGroupedAggFunction = Union[Callable[[pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], LiteralType], PandasVariadicGroupedAggFunction]
64+
PandasGroupedAggFunction = Union[Callable[[SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType], PandasVariadicGroupedAggFunction]
6465

65-
PandasMapIterFunction = Callable[[Iterable[pandas.core.frame.DataFrame]], Iterable[pandas.core.frame.DataFrame]]
66+
PandasMapIterFunction = Callable[[Iterable[DataFrameLike]], Iterable[DataFrameLike]]
6667

67-
PandasCogroupedMapFunction = Callable[[pandas.core.frame.DataFrame, pandas.core.frame.DataFrame], pandas.core.frame.DataFrame]
68+
PandasCogroupedMapFunction = Callable[[DataFrameLike, DataFrameLike], DataFrameLike]
6869

6970
class UserDefinedFunctionLike(Protocol):
7071
def __call__(self, *_: ColumnOrName) -> Column:

third_party/3/pyspark/sql/pandas/functions.pyi

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,20 @@ class PandasUDFType:
2020
@overload
2121
def pandas_udf(f: PandasScalarToScalarFunction, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> UserDefinedFunctionLike: ...
2222
@overload
23-
def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ...
23+
def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... # type: ignore[misc]
2424
@overload
25-
def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ...
25+
def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... # type: ignore[misc]
2626
@overload
27-
def pandas_udf(*, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ...
27+
def pandas_udf(*, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... # type: ignore[misc]
2828

2929
@overload
3030
def pandas_udf(f: PandasScalarToStructFunction, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> UserDefinedFunctionLike: ...
3131
@overload
32-
def pandas_udf(f: Union[StructType, str], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...
32+
def pandas_udf(f: Union[StructType, str], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... # type: ignore[misc]
3333
@overload
34-
def pandas_udf(f: Union[StructType, str], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...
34+
def pandas_udf(f: Union[StructType, str], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... # type: ignore[misc]
3535
@overload
36-
def pandas_udf(*, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...
36+
def pandas_udf(*, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... # type: ignore[misc]
3737

3838
@overload
3939
def pandas_udf(f: PandasScalarIterFunction, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarIterUDFType) -> UserDefinedFunctionLike: ...

0 commit comments

Comments
 (0)