Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 433fa97

Browse files
RubtsowaAlexanderKalistratov
authored andcommitted
Add functions for dataframe: median, mean, min, max, sum (#345)
* change * Impl functions for dataframe:median, mean, min, max, sum * add *arga, **kwars * change config and use TypeChecker * add functions: std, var, prod, count. Add tests for functions * change * change name input parameter * refactor * added change for methods median and min * correct input parameters * change * delete method count * change * correction function. problems with parameters for series methods * correction functions for Series and for DataFrame * correction problem with PEP8 * delete print * skip some tests * correction tests and Series mehods * correction doc for df methods * correction test * delete 1 import
1 parent 1af37a2 commit 433fa97

File tree

4 files changed

+545
-63
lines changed

4 files changed

+545
-63
lines changed

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 341 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636

3737
from numba import types
3838
from numba.extending import (overload, overload_method, overload_attribute)
39-
from sdc.hiframes.pd_dataframe_type import DataFrameType
4039
from numba.errors import TypingError
40+
from sdc.hiframes.pd_dataframe_type import DataFrameType
4141

4242
from sdc.datatypes.hpat_pandas_series_functions import TypeChecker
4343
from sdc.hiframes.pd_dataframe_ext import get_dataframe_data
@@ -96,26 +96,360 @@ def sdc_pandas_dataframe_reduce_columns(df, func_name, params, ser_params):
9696
return _reduce_impl
9797

9898

99+
def check_type(name, df, axis=None, skipna=None, level=None, numeric_only=None, ddof=1, min_count=0):
100+
ty_checker = TypeChecker('Method {}().'.format(name))
101+
ty_checker.check(df, DataFrameType)
102+
103+
if not (isinstance(axis, types.Omitted) or axis is None):
104+
ty_checker.raise_exc(axis, 'unsupported', 'axis')
105+
106+
if not (isinstance(skipna, (types.Omitted, types.NoneType, types.Boolean)) or skipna is None):
107+
ty_checker.raise_exc(skipna, 'bool', 'skipna')
108+
109+
if not (isinstance(level, types.Omitted) or level is None):
110+
ty_checker.raise_exc(level, 'unsupported', 'level')
111+
112+
if not (isinstance(numeric_only, types.Omitted) or numeric_only is None):
113+
ty_checker.raise_exc(numeric_only, 'unsupported', 'numeric_only')
114+
115+
if not (isinstance(ddof, types.Omitted) or ddof == 1):
116+
ty_checker.raise_exc(ddof, 'unsupported', 'ddof')
117+
118+
if not (isinstance(min_count, types.Omitted) or min_count == 0):
119+
ty_checker.raise_exc(min_count, 'unsupported', 'min_count')
120+
121+
122+
@overload_method(DataFrameType, 'median')
123+
def median_overload(df, axis=None, skipna=None, level=None, numeric_only=None):
124+
"""
125+
Pandas DataFrame method :meth:`pandas.DataFrame.median` implementation.
126+
127+
.. only:: developer
128+
129+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_median*
130+
131+
Parameters
132+
-----------
133+
self: :class:`pandas.DataFrame`
134+
input arg
135+
axis:
136+
*unsupported*
137+
skipna:
138+
*unsupported*
139+
level:
140+
*unsupported*
141+
numeric_only:
142+
*unsupported*
143+
144+
Returns
145+
-------
146+
:obj:`pandas.Series` or `pandas.DataFrame`
147+
return the median of the values for the requested axis.
148+
"""
149+
150+
name = 'median'
151+
152+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only)
153+
154+
params = {'axis': None, 'skipna': None, 'level': None, 'numeric_only': None}
155+
ser_par = {'skipna': 'skipna', 'level': 'level'}
156+
157+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
158+
159+
160+
@overload_method(DataFrameType, 'mean')
161+
def mean_overload(df, axis=None, skipna=None, level=None, numeric_only=None):
162+
"""
163+
Pandas DataFrame method :meth:`pandas.DataFrame.mean` implementation.
164+
165+
.. only:: developer
166+
167+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_mean*
168+
169+
Parameters
170+
-----------
171+
self: :class:`pandas.DataFrame`
172+
input arg
173+
axis:
174+
*unsupported*
175+
skipna:
176+
*unsupported*
177+
level:
178+
*unsupported*
179+
numeric_only:
180+
*unsupported*
181+
182+
Returns
183+
-------
184+
:obj:`pandas.Series` or `pandas.DataFrame`
185+
return the mean of the values for the requested axis.
186+
"""
187+
188+
name = 'mean'
189+
190+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only)
191+
192+
params = {'axis': None, 'skipna': None, 'level': None, 'numeric_only': None}
193+
ser_par = {'skipna': 'skipna', 'level': 'level'}
194+
195+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
196+
197+
198+
@overload_method(DataFrameType, 'std')
199+
def std_overload(df, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
200+
"""
201+
Pandas DataFrame method :meth:`pandas.DataFrame.std` implementation.
202+
203+
.. only:: developer
204+
205+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_std*
206+
207+
Parameters
208+
-----------
209+
self: :class:`pandas.DataFrame`
210+
input arg
211+
axis:
212+
*unsupported*
213+
skipna:
214+
*unsupported*
215+
level:
216+
*unsupported*
217+
ddof:
218+
*unsupported*
219+
numeric_only:
220+
*unsupported*
221+
222+
Returns
223+
-------
224+
:obj:`pandas.Series` or `pandas.DataFrame`
225+
return sample standard deviation over requested axis.
226+
"""
227+
228+
name = 'std'
229+
230+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, ddof=ddof)
231+
232+
params = {'axis': None, 'skipna': None, 'level': None, 'ddof': 1, 'numeric_only': None}
233+
ser_par = {'skipna': 'skipna', 'level': 'level', 'ddof': 'ddof'}
234+
235+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
236+
237+
238+
@overload_method(DataFrameType, 'var')
239+
def var_overload(df, axis=None, skipna=None, level=None, ddof=1, numeric_only=None):
240+
"""
241+
Pandas DataFrame method :meth:`pandas.DataFrame.var` implementation.
242+
243+
.. only:: developer
244+
245+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_var*
246+
247+
Parameters
248+
-----------
249+
self: :class:`pandas.DataFrame`
250+
input arg
251+
axis:
252+
*unsupported*
253+
skipna:
254+
*unsupported*
255+
level:
256+
*unsupported*
257+
ddof:
258+
*unsupported*
259+
numeric_only:
260+
*unsupported*
261+
262+
Returns
263+
-------
264+
:obj:`pandas.Series` or `pandas.DataFrame`
265+
return sample standard deviation over requested axis.
266+
"""
267+
268+
name = 'var'
269+
270+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, ddof=ddof)
271+
272+
params = {'axis': None, 'skipna': None, 'level': None, 'ddof': 1, 'numeric_only': None}
273+
ser_par = {'skipna': 'skipna', 'level': 'level', 'ddof': 'ddof'}
274+
275+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
276+
277+
278+
@overload_method(DataFrameType, 'max')
279+
def max_overload(df, axis=None, skipna=None, level=None, numeric_only=None):
280+
"""
281+
Pandas DataFrame method :meth:`pandas.DataFrame.max` implementation.
282+
283+
.. only:: developer
284+
285+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_max*
286+
287+
Parameters
288+
-----------
289+
self: :class:`pandas.DataFrame`
290+
input arg
291+
axis:
292+
*unsupported*
293+
skipna:
294+
*unsupported*
295+
level:
296+
*unsupported*
297+
numeric_only:
298+
*unsupported*
299+
300+
Returns
301+
-------
302+
:obj:`pandas.Series` or `pandas.DataFrame`
303+
return the maximum of the values for the requested axis.
304+
"""
305+
306+
name = 'max'
307+
308+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only)
309+
310+
params = {'axis': None, 'skipna': None, 'level': None, 'numeric_only': None}
311+
ser_par = {'skipna': 'skipna', 'level': 'level'}
312+
313+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
314+
315+
316+
@overload_method(DataFrameType, 'min')
317+
def min_overload(df, axis=None, skipna=None, level=None, numeric_only=None):
318+
"""
319+
Pandas DataFrame method :meth:`pandas.DataFrame.min` implementation.
320+
321+
.. only:: developer
322+
323+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_min*
324+
325+
Parameters
326+
-----------
327+
self: :class:`pandas.DataFrame`
328+
input arg
329+
axis:
330+
*unsupported*
331+
skipna:
332+
*unsupported*
333+
level:
334+
*unsupported*
335+
numeric_only:
336+
*unsupported*
337+
338+
Returns
339+
-------
340+
:obj:`pandas.Series` or `pandas.DataFrame`
341+
returns: the minimum of the values for the requested axis.
342+
"""
343+
344+
name = 'min'
345+
346+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only)
347+
348+
params = {'axis': None, 'skipna': None, 'level': None, 'numeric_only': None}
349+
ser_par = {'skipna': 'skipna', 'level': 'level'}
350+
351+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
352+
353+
354+
@overload_method(DataFrameType, 'sum')
355+
def sum_overload(df, axis=None, skipna=None, level=None, numeric_only=None, min_count=0):
356+
"""
357+
Pandas DataFrame method :meth:`pandas.DataFrame.sum` implementation.
358+
359+
.. only:: developer
360+
361+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_sum*
362+
363+
Parameters
364+
-----------
365+
self: :class:`pandas.DataFrame`
366+
input arg
367+
axis:
368+
*unsupported*
369+
skipna:
370+
*unsupported*
371+
level:
372+
*unsupported*
373+
numeric_only:
374+
*unsupported*
375+
min_count:
376+
*unsupported*
377+
378+
Returns
379+
-------
380+
:obj:`pandas.Series` or `pandas.DataFrame`
381+
return the sum of the values for the requested axis.
382+
"""
383+
384+
name = 'sum'
385+
386+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, min_count=min_count)
387+
388+
params = {'axis': None, 'skipna': None, 'level': None, 'numeric_only': None, 'min_count': 0}
389+
ser_par = {'skipna': 'skipna', 'level': 'level', 'min_count': 'min_count'}
390+
391+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
392+
393+
394+
@overload_method(DataFrameType, 'prod')
395+
def prod_overload(df, axis=None, skipna=None, level=None, numeric_only=None, min_count=0):
396+
"""
397+
Pandas DataFrame method :meth:`pandas.DataFrame.prod` implementation.
398+
399+
.. only:: developer
400+
401+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_prod*
402+
403+
Parameters
404+
-----------
405+
self: :class:`pandas.DataFrame`
406+
input arg
407+
axis:
408+
*unsupported*
409+
skipna:
410+
*unsupported*
411+
level:
412+
*unsupported*
413+
numeric_only:
414+
*unsupported*
415+
min_count:
416+
*unsupported*
417+
418+
Returns
419+
-------
420+
:obj:`pandas.Series` or `pandas.DataFrame`
421+
return the product of the values for the requested axis.
422+
"""
423+
424+
name = 'prod'
425+
426+
check_type(name, df, axis=axis, skipna=skipna, level=level, numeric_only=numeric_only, min_count=min_count)
427+
428+
params = {'axis': None, 'skipna': None, 'level': None, 'numeric_only': None, 'min_count': 0}
429+
ser_par = {'skipna': 'skipna', 'level': 'level', 'min_count': 'min_count'}
430+
431+
return sdc_pandas_dataframe_reduce_columns(df, name, params, ser_par)
432+
433+
99434
@overload_method(DataFrameType, 'count')
100435
def count_overload(df, axis=0, level=None, numeric_only=False):
101436
"""
102437
Pandas DataFrame method :meth:`pandas.DataFrame.count` implementation.
103438
104439
.. only:: developer
105440
106-
Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count
107-
Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count1
441+
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_count*
108442
109443
Parameters
110444
-----------
111445
self: :class:`pandas.DataFrame`
112-
input arg
446+
input arg
113447
axis:
114-
*unsupported*
448+
*unsupported*
115449
level:
116-
*unsupported*
450+
*unsupported*
117451
numeric_only:
118-
*unsupported*
452+
*unsupported*
119453
120454
Returns
121455
-------

0 commit comments

Comments
 (0)