Coverage for pyspark/sql/tests/test_pandas_udf

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

# Licensed to the Apache Software Foundation (ASF) under one or more

# contributor license agreements. See the NOTICE file distributed with

# this work for additional information regarding copyright ownership.

# The ASF licenses this file to You under the Apache License, Version 2.0

# (the "License"); you may not use this file except in compliance with

# the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

import unittest

import inspect

from typing import Union, Iterator, Tuple

from pyspark.sql.functions import mean, lit

from pyspark.testing.sqlutils import ReusedSQLTestCase, \

have_pandas, have_pyarrow, pandas_requirement_message, \

pyarrow_requirement_message

from pyspark.sql.pandas.typehints import infer_eval_type

from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType

from pyspark.sql import Row

if have_pandas:

import pandas as pd

import numpy as np

from pandas.testing import assert_frame_equal

@unittest.skipIf(

not have_pandas or not have_pyarrow,

pandas_requirement_message or pyarrow_requirement_message) # type: ignore[arg-type]

class PandasUDFTypeHintsTests(ReusedSQLTestCase):

def test_type_annotation_scalar(self):

def func(col: pd.Series) -> pd.Series:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)

def func(col: pd.DataFrame, col1: pd.Series) -> pd.DataFrame:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)

def func(col: pd.DataFrame, *args: pd.Series) -> pd.Series:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)

def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> pd.Series:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)

def func(col: pd.Series, *, col2: pd.DataFrame) -> pd.DataFrame:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)

def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> pd.Series:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR)

def test_type_annotation_scalar_iter(self):

def func(iter: Iterator[pd.Series]) -> Iterator[pd.Series]:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)

def func(iter: Iterator[Tuple[pd.DataFrame, pd.Series]]) -> Iterator[pd.DataFrame]:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)

def func(iter: Iterator[Tuple[pd.DataFrame, ...]]) -> Iterator[pd.Series]:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)

def func(

iter: Iterator[Tuple[Union[pd.DataFrame, pd.Series], ...]]

) -> Iterator[pd.Series]:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.SCALAR_ITER)

def test_type_annotation_group_agg(self):

def func(col: pd.Series) -> str:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)

def func(col: pd.DataFrame, col1: pd.Series) -> int:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)

def func(col: pd.DataFrame, *args: pd.Series) -> Row:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)

def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> str:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)

def func(col: pd.Series, *, col2: pd.DataFrame) -> float:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)

def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> float:

pass

self.assertEqual(

infer_eval_type(inspect.signature(func)), PandasUDFType.GROUPED_AGG)

def test_type_annotation_negative(self):

def func(col: str) -> pd.Series:

pass

self.assertRaisesRegex(

NotImplementedError,

"Unsupported signature.*str",