@@ -321,7 +321,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
321
321
and args ["y" ]
322
322
and len (trace_data [[args ["x" ], args ["y" ]]].dropna ()) > 1
323
323
):
324
-
325
324
# sorting is bad but trace_specs with "trendline" have no other attrs
326
325
sorted_trace_data = trace_data .sort_values (by = args ["x" ])
327
326
y = sorted_trace_data [args ["y" ]].values
@@ -562,7 +561,6 @@ def set_cartesian_axis_opts(args, axis, letter, orders):
562
561
563
562
564
563
def configure_cartesian_marginal_axes (args , fig , orders ):
565
-
566
564
if "histogram" in [args ["marginal_x" ], args ["marginal_y" ]]:
567
565
fig .layout ["barmode" ] = "overlay"
568
566
@@ -1064,14 +1062,14 @@ def _escape_col_name(df_input, col_name, extra):
1064
1062
return col_name
1065
1063
1066
1064
1067
- def to_unindexed_series (x ):
1065
+ def to_unindexed_series (x , name = None ):
1068
1066
"""
1069
1067
assuming x is list-like or even an existing pd.Series, return a new pd.Series with
1070
1068
no index, without extracting the data from an existing Series via numpy, which
1071
1069
seems to mangle datetime columns. Stripping the index from existing pd.Series is
1072
1070
required to get things to match up right in the new DataFrame we're building
1073
1071
"""
1074
- return pd .Series (x ).reset_index (drop = True )
1072
+ return pd .Series (x , name = name ).reset_index (drop = True )
1075
1073
1076
1074
1077
1075
def process_args_into_dataframe (args , wide_mode , var_name , value_name ):
@@ -1086,9 +1084,12 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1086
1084
df_input = args ["data_frame" ]
1087
1085
df_provided = df_input is not None
1088
1086
1089
- df_output = pd .DataFrame ()
1090
- constants = dict ()
1091
- ranges = list ()
1087
+ # we use a dict instead of a dataframe directly so that it doesn't cause
1088
+ # PerformanceWarning by pandas by repeatedly setting the columns.
1089
+ # a dict is used instead of a list as the columns needs to be overwritten.
1090
+ df_output = {}
1091
+ constants = {}
1092
+ ranges = []
1092
1093
wide_id_vars = set ()
1093
1094
reserved_names = _get_reserved_col_names (args ) if df_provided else set ()
1094
1095
@@ -1099,7 +1100,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1099
1100
"No data were provided. Please provide data either with the `data_frame` or with the `dimensions` argument."
1100
1101
)
1101
1102
else :
1102
- df_output [ df_input . columns ] = df_input [ df_input . columns ]
1103
+ df_output = { col : series for col , series in df_input . items ()}
1103
1104
1104
1105
# hover_data is a dict
1105
1106
hover_data_is_dict = (
@@ -1140,7 +1141,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1140
1141
# argument_list and field_list ready, iterate over them
1141
1142
# Core of the loop starts here
1142
1143
for i , (argument , field ) in enumerate (zip (argument_list , field_list )):
1143
- length = len (df_output )
1144
+ length = len (df_output [ next ( iter ( df_output ))]) if len ( df_output ) else 0
1144
1145
if argument is None :
1145
1146
continue
1146
1147
col_name = None
@@ -1181,11 +1182,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1181
1182
% (
1182
1183
argument ,
1183
1184
len (real_argument ),
1184
- str (list (df_output .columns )),
1185
+ str (list (df_output .keys () )),
1185
1186
length ,
1186
1187
)
1187
1188
)
1188
- df_output [col_name ] = to_unindexed_series (real_argument )
1189
+ df_output [col_name ] = to_unindexed_series (real_argument , col_name )
1189
1190
elif not df_provided :
1190
1191
raise ValueError (
1191
1192
"String or int arguments are only possible when a "
@@ -1214,13 +1215,15 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1214
1215
% (
1215
1216
field ,
1216
1217
len (df_input [argument ]),
1217
- str (list (df_output .columns )),
1218
+ str (list (df_output .keys () )),
1218
1219
length ,
1219
1220
)
1220
1221
)
1221
1222
else :
1222
1223
col_name = str (argument )
1223
- df_output [col_name ] = to_unindexed_series (df_input [argument ])
1224
+ df_output [col_name ] = to_unindexed_series (
1225
+ df_input [argument ], col_name
1226
+ )
1224
1227
# ----------------- argument is likely a column / array / list.... -------
1225
1228
else :
1226
1229
if df_provided and hasattr (argument , "name" ):
@@ -1247,9 +1250,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1247
1250
"All arguments should have the same length. "
1248
1251
"The length of argument `%s` is %d, whereas the "
1249
1252
"length of previously-processed arguments %s is %d"
1250
- % (field , len (argument ), str (list (df_output .columns )), length )
1253
+ % (field , len (argument ), str (list (df_output .keys () )), length )
1251
1254
)
1252
- df_output [str (col_name )] = to_unindexed_series (argument )
1255
+ df_output [str (col_name )] = to_unindexed_series (argument , str ( col_name ) )
1253
1256
1254
1257
# Finally, update argument with column name now that column exists
1255
1258
assert col_name is not None , (
@@ -1267,12 +1270,19 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1267
1270
if field_name != "wide_variable" :
1268
1271
wide_id_vars .add (str (col_name ))
1269
1272
1270
- for col_name in ranges :
1271
- df_output [col_name ] = range (len (df_output ))
1272
-
1273
- for col_name in constants :
1274
- df_output [col_name ] = constants [col_name ]
1273
+ length = len (df_output [next (iter (df_output ))]) if len (df_output ) else 0
1274
+ df_output .update (
1275
+ {col_name : pd .Series (range (length ), name = col_name ) for col_name in ranges }
1276
+ )
1277
+ df_output .update (
1278
+ {
1279
+ # constant is single value. repeat by len to avoid creating NaN on concating
1280
+ col_name : pd .Series ([constants [col_name ]] * length , name = col_name )
1281
+ for col_name in constants
1282
+ }
1283
+ )
1275
1284
1285
+ df_output = pd .DataFrame (df_output )
1276
1286
return df_output , wide_id_vars
1277
1287
1278
1288
0 commit comments