@@ -327,10 +327,14 @@ def pivot_longer(
327
327
Should be either a single column name, or a list/tuple of
328
328
column names.
329
329
`index` should be a list of tuples if the columns are a MultiIndex.
330
+ Column selection is possible using the
331
+ [`select`][janitor.functions.select.select] syntax.
330
332
column_names: Name(s) of columns to unpivot. Should be either
331
333
a single column name or a list/tuple of column names.
332
334
`column_names` should be a list of tuples
333
335
if the columns are a MultiIndex.
336
+ Column selection is possible using the
337
+ [`select`][janitor.functions.select.select] syntax.
334
338
names_to: Name of new column as a string that will contain
335
339
what were previously the column names in `column_names`.
336
340
The default is `variable` if no value is provided. It can
@@ -420,10 +424,13 @@ def pivot_longer_spec(
420
424
) -> pd .DataFrame :
421
425
"""A declarative interface to pivot a DataFrame from wide to long form,
422
426
where you describe how the data will be unpivoted,
423
- using a DataFrame. This gives you, the user,
427
+ using a DataFrame.
428
+
429
+ This gives you, the user,
424
430
more control over unpivoting, where you create a “spec”
425
431
data frame that describes exactly how data stored
426
432
in the column names becomes variables.
433
+
427
434
It can come in handy for situations where
428
435
[`pivot_longer`][janitor.functions.pivot.pivot_longer]
429
436
seems inadequate for the transformation.
@@ -2380,3 +2387,151 @@ def _check_tuples_multiindex(indexer, args, param):
2380
2387
)
2381
2388
2382
2389
return args
2390
+
2391
+
2392
+ def pivot_wider_spec (
2393
+ df : pd .DataFrame ,
2394
+ spec : pd .DataFrame ,
2395
+ index : list | tuple | str | Pattern = None ,
2396
+ reset_index : bool = True ,
2397
+ ) -> pd .DataFrame :
2398
+ """A declarative interface to pivot a DataFrame from long to wide form,
2399
+ where you describe how the data will be pivoted,
2400
+ using a DataFrame.
2401
+
2402
+ This gives you, the user,
2403
+ more control over pivoting, where you create a “spec”
2404
+ data frame that describes exactly how data stored
2405
+ in the column names becomes variables.
2406
+
2407
+ It can come in handy for situations where
2408
+ `pd.DataFrame.pivot`
2409
+ seems inadequate for the transformation.
2410
+
2411
+ !!! info "New in version 0.31.0"
2412
+
2413
+ Examples:
2414
+ >>> import pandas as pd
2415
+ >>> from janitor import pivot_wider_spec
2416
+ >>> df = pd.DataFrame(
2417
+ ... [
2418
+ ... {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
2419
+ ... {"famid": 1, "birth": 1, "age": 2, "ht": 3.4},
2420
+ ... {"famid": 1, "birth": 2, "age": 1, "ht": 2.9},
2421
+ ... {"famid": 1, "birth": 2, "age": 2, "ht": 3.8},
2422
+ ... {"famid": 1, "birth": 3, "age": 1, "ht": 2.2},
2423
+ ... {"famid": 1, "birth": 3, "age": 2, "ht": 2.9},
2424
+ ... {"famid": 2, "birth": 1, "age": 1, "ht": 2.0},
2425
+ ... {"famid": 2, "birth": 1, "age": 2, "ht": 3.2},
2426
+ ... {"famid": 2, "birth": 2, "age": 1, "ht": 1.8},
2427
+ ... {"famid": 2, "birth": 2, "age": 2, "ht": 2.8},
2428
+ ... {"famid": 2, "birth": 3, "age": 1, "ht": 1.9},
2429
+ ... {"famid": 2, "birth": 3, "age": 2, "ht": 2.4},
2430
+ ... {"famid": 3, "birth": 1, "age": 1, "ht": 2.2},
2431
+ ... {"famid": 3, "birth": 1, "age": 2, "ht": 3.3},
2432
+ ... {"famid": 3, "birth": 2, "age": 1, "ht": 2.3},
2433
+ ... {"famid": 3, "birth": 2, "age": 2, "ht": 3.4},
2434
+ ... {"famid": 3, "birth": 3, "age": 1, "ht": 2.1},
2435
+ ... {"famid": 3, "birth": 3, "age": 2, "ht": 2.9},
2436
+ ... ]
2437
+ ... )
2438
+ >>> df
2439
+ famid birth age ht
2440
+ 0 1 1 1 2.8
2441
+ 1 1 1 2 3.4
2442
+ 2 1 2 1 2.9
2443
+ 3 1 2 2 3.8
2444
+ 4 1 3 1 2.2
2445
+ 5 1 3 2 2.9
2446
+ 6 2 1 1 2.0
2447
+ 7 2 1 2 3.2
2448
+ 8 2 2 1 1.8
2449
+ 9 2 2 2 2.8
2450
+ 10 2 3 1 1.9
2451
+ 11 2 3 2 2.4
2452
+ 12 3 1 1 2.2
2453
+ 13 3 1 2 3.3
2454
+ 14 3 2 1 2.3
2455
+ 15 3 2 2 3.4
2456
+ 16 3 3 1 2.1
2457
+ 17 3 3 2 2.9
2458
+ >>> spec = {".name": ["ht1", "ht2"],
2459
+ ... ".value": ["ht", "ht"],
2460
+ ... "age": [1, 2]}
2461
+ >>> spec = pd.DataFrame(spec)
2462
+ >>> spec
2463
+ .name .value age
2464
+ 0 ht1 ht 1
2465
+ 1 ht2 ht 2
2466
+ >>> pivot_wider_spec(df=df,spec=spec, index=['famid','birth'])
2467
+ famid birth ht1 ht2
2468
+ 0 1 1 2.8 3.4
2469
+ 1 1 2 2.9 3.8
2470
+ 2 1 3 2.2 2.9
2471
+ 3 2 1 2.0 3.2
2472
+ 4 2 2 1.8 2.8
2473
+ 5 2 3 1.9 2.4
2474
+ 6 3 1 2.2 3.3
2475
+ 7 3 2 2.3 3.4
2476
+ 8 3 3 2.1 2.9
2477
+
2478
+ Args:
2479
+ df: A pandas DataFrame.
2480
+ spec: A specification DataFrame.
2481
+ At a minimum, the spec DataFrame
2482
+ must have a '.name' and a '.value' columns.
2483
+ The '.name' column should contain the
2484
+ the names of the columns in the output DataFrame.
2485
+ The '.value' column should contain the name of the column(s)
2486
+ in the source DataFrame that will be serve as the values.
2487
+ Additional columns in spec will serves as the columns
2488
+ to be flipped to wide form.
2489
+ Note that these additional columns should already exist
2490
+ in the source DataFrame.
2491
+ index: Name(s) of columns to use as identifier variables.
2492
+ It should be either a single column name, or a list of column names.
2493
+ If `index` is not provided, the DataFrame's index is used.
2494
+ Column selection is possible using the
2495
+ [`select`][janitor.functions.select.select] syntax.
2496
+ reset_index: Determines whether to reset the `index`.
2497
+ Applicable only if `index` is provided.
2498
+
2499
+ Returns:
2500
+ A pandas DataFrame that has been unpivoted from long to wide form.
2501
+ """ # noqa: E501
2502
+ check ("spec" , spec , [pd .DataFrame ])
2503
+ check ("reset_index" , reset_index , [bool ])
2504
+ if not spec .columns .is_unique :
2505
+ raise ValueError ("Kindly ensure the spec's columns is unique." )
2506
+ if ".name" not in spec .columns :
2507
+ raise KeyError (
2508
+ "Kindly ensure the spec DataFrame has a `.name` column."
2509
+ )
2510
+ if ".value" not in spec .columns :
2511
+ raise KeyError (
2512
+ "Kindly ensure the spec DataFrame has a `.value` column."
2513
+ )
2514
+ if spec .columns .tolist ()[:2 ] != [".name" , ".value" ]:
2515
+ raise ValueError (
2516
+ "The first two columns of the spec DataFrame "
2517
+ "should be '.name' and '.value', "
2518
+ "with '.name' coming before '.value'."
2519
+ )
2520
+ if spec .columns .size == 2 :
2521
+ raise ValueError (
2522
+ "Kindly provide the column(s) "
2523
+ "to use to make new frame’s columns"
2524
+ )
2525
+ columns = spec .columns [2 :]
2526
+ values = spec [".value" ].unique ()
2527
+ if index is not None :
2528
+ index = _select_index ([index ], df , axis = "columns" )
2529
+ index = df .columns [index ].tolist ()
2530
+ df = df .pivot (index = index , columns = columns , values = values )
2531
+ _index = spec .columns [1 :].tolist ()
2532
+ spec = spec .set_index (_index ).squeeze ()
2533
+ df = df .reindex (columns = spec .index )
2534
+ df .columns = df .columns .map (spec )
2535
+ if reset_index and index :
2536
+ return df .reset_index ()
2537
+ return df
0 commit comments