6
6
7
7
import numpy as np
8
8
import pandas as pd
9
- from pandas .core .tools .datetimes import _guess_datetime_format_for_array
10
9
10
+ from sdmetrics ._utils_metadata import _convert_datetime_column
11
11
from sdmetrics .utils import (
12
12
discretize_column ,
13
13
get_alternate_keys ,
14
14
get_columns_from_metadata ,
15
15
get_type_from_column_meta ,
16
- is_datetime ,
17
16
)
18
17
19
18
CONTINUOUS_SDTYPES = ['numerical' , 'datetime' ]
@@ -35,51 +34,6 @@ class PlotConfig:
35
34
FONT_SIZE = 18
36
35
37
36
38
- def convert_to_datetime (column_data , datetime_format = None ):
39
- """Convert a column data to pandas datetime.
40
-
41
- Args:
42
- column_data (pandas.Series):
43
- The column data
44
- format (str):
45
- Optional string format of datetime. If ``None``, will attempt to infer the datetime
46
- format from the column data. Defaults to ``None``.
47
-
48
- Returns:
49
- pandas.Series:
50
- The converted column data.
51
- """
52
- if is_datetime (column_data ):
53
- return column_data
54
-
55
- if datetime_format is None :
56
- datetime_format = _guess_datetime_format_for_array (column_data .astype (str ).to_numpy ())
57
-
58
- return pd .to_datetime (column_data , format = datetime_format )
59
-
60
-
61
- def convert_datetime_columns (real_column , synthetic_column , col_metadata ):
62
- """Convert a real and a synthetic column to pandas datetime.
63
-
64
- Args:
65
- real_data (pandas.Series):
66
- The real column data
67
- synthetic_column (pandas.Series):
68
- The synthetic column data
69
- col_metadata:
70
- The metadata associated with the column
71
-
72
- Returns:
73
- (pandas.Series, pandas.Series):
74
- The converted real and synthetic column data.
75
- """
76
- datetime_format = col_metadata .get ('format' ) or col_metadata .get ('datetime_format' )
77
- return (
78
- convert_to_datetime (real_column , datetime_format ),
79
- convert_to_datetime (synthetic_column , datetime_format ),
80
- )
81
-
82
-
83
37
def discretize_table_data (real_data , synthetic_data , metadata ):
84
38
"""Create a copy of the real and synthetic data with discretized data.
85
39
@@ -109,10 +63,8 @@ def discretize_table_data(real_data, synthetic_data, metadata):
109
63
real_col = real_data [column_name ]
110
64
synthetic_col = synthetic_data [column_name ]
111
65
if sdtype == 'datetime' :
112
- datetime_format = column_meta .get ('format' ) or column_meta .get ('datetime_format' )
113
- if real_col .dtype == 'O' and datetime_format :
114
- real_col = pd .to_datetime (real_col , format = datetime_format )
115
- synthetic_col = pd .to_datetime (synthetic_col , format = datetime_format )
66
+ real_col = _convert_datetime_column (column_name , real_col , column_meta )
67
+ synthetic_col = _convert_datetime_column (column_name , synthetic_col , column_meta )
116
68
117
69
real_col = pd .to_numeric (real_col )
118
70
synthetic_col = pd .to_numeric (synthetic_col )
0 commit comments