|
15 | 15 | from pandas.core.dtypes.concat import concat_compat
|
16 | 16 |
|
17 | 17 | from janitor.functions.utils import (
|
18 |
| - _select_column_names, |
| 18 | + _select_index, |
19 | 19 | _computations_expand_grid,
|
20 | 20 | )
|
21 | 21 | from janitor.utils import check
|
@@ -52,7 +52,7 @@ def pivot_longer(
|
52 | 52 | row axis.
|
53 | 53 |
|
54 | 54 | Column selection in `index` and `column_names` is possible using the
|
55 |
| - [`select_columns`][janitor.functions.select_columns.select_columns] syntax. |
| 55 | + [`select_columns`][janitor.functions.select.select_columns] syntax. |
56 | 56 |
|
57 | 57 | Example:
|
58 | 58 |
|
@@ -382,17 +382,35 @@ def _data_checks_pivot_longer(
|
382 | 382 | "when the columns are a MultiIndex."
|
383 | 383 | )
|
384 | 384 |
|
| 385 | + is_multi_index = isinstance(df.columns, pd.MultiIndex) |
| 386 | + indices = None |
385 | 387 | if column_names is not None:
|
386 |
| - if is_list_like(column_names): |
387 |
| - column_names = list(column_names) |
388 |
| - column_names = _select_column_names(column_names, df) |
389 |
| - column_names = list(column_names) |
| 388 | + if is_multi_index: |
| 389 | + column_names = _check_tuples_multiindex( |
| 390 | + df.columns, column_names, "column_names" |
| 391 | + ) |
| 392 | + else: |
| 393 | + if is_list_like(column_names): |
| 394 | + column_names = list(column_names) |
| 395 | + indices = _select_index(column_names, df, axis="columns") |
| 396 | + column_names = df.columns[indices] |
| 397 | + if not is_list_like(column_names): |
| 398 | + column_names = [column_names] |
| 399 | + else: |
| 400 | + column_names = list(column_names) |
390 | 401 |
|
391 | 402 | if index is not None:
|
392 |
| - if is_list_like(index): |
393 |
| - index = list(index) |
394 |
| - index = _select_column_names(index, df) |
395 |
| - index = list(index) |
| 403 | + if is_multi_index: |
| 404 | + index = _check_tuples_multiindex(df.columns, index, "index") |
| 405 | + else: |
| 406 | + if is_list_like(index): |
| 407 | + index = list(index) |
| 408 | + indices = _select_index(index, df, axis="columns") |
| 409 | + index = df.columns[indices] |
| 410 | + if not is_list_like(index): |
| 411 | + index = [index] |
| 412 | + else: |
| 413 | + index = list(index) |
396 | 414 |
|
397 | 415 | if index is None:
|
398 | 416 | if column_names is None:
|
@@ -1181,7 +1199,7 @@ def pivot_wider(
|
1181 | 1199 |
|
1182 | 1200 | Column selection in `index`, `names_from` and `values_from`
|
1183 | 1201 | is possible using the
|
1184 |
| - [`select_columns`][janitor.functions.select_columns.select_columns] syntax. |
| 1202 | + [`select_columns`][janitor.functions.select.select_columns] syntax. |
1185 | 1203 |
|
1186 | 1204 | A ValueError is raised if the combination
|
1187 | 1205 | of the `index` and `names_from` is not unique.
|
@@ -1455,27 +1473,69 @@ def _data_checks_pivot_wider(
|
1455 | 1473 | checking happens.
|
1456 | 1474 | """
|
1457 | 1475 |
|
| 1476 | + is_multi_index = isinstance(df.columns, pd.MultiIndex) |
| 1477 | + indices = None |
1458 | 1478 | if index is not None:
|
1459 |
| - if is_list_like(index): |
1460 |
| - index = list(index) |
1461 |
| - index = _select_column_names(index, df) |
1462 |
| - index = list(index) |
| 1479 | + if is_multi_index: |
| 1480 | + if not isinstance(index, list): |
| 1481 | + raise TypeError( |
| 1482 | + "For a MultiIndex column, pass a list of tuples " |
| 1483 | + "to the index argument." |
| 1484 | + ) |
| 1485 | + index = _check_tuples_multiindex(df.columns, index, "index") |
| 1486 | + else: |
| 1487 | + if is_list_like(index): |
| 1488 | + index = list(index) |
| 1489 | + indices = _select_index(index, df, axis="columns") |
| 1490 | + index = df.columns[indices] |
| 1491 | + if not is_list_like(index): |
| 1492 | + index = [index] |
| 1493 | + else: |
| 1494 | + index = list(index) |
1463 | 1495 |
|
1464 | 1496 | if names_from is None:
|
1465 | 1497 | raise ValueError(
|
1466 | 1498 | "pivot_wider() is missing 1 required argument: 'names_from'"
|
1467 | 1499 | )
|
1468 | 1500 |
|
1469 |
| - if is_list_like(names_from): |
1470 |
| - names_from = list(names_from) |
1471 |
| - names_from = _select_column_names(names_from, df) |
1472 |
| - names_from = list(names_from) |
| 1501 | + if is_multi_index: |
| 1502 | + if not isinstance(names_from, list): |
| 1503 | + raise TypeError( |
| 1504 | + "For a MultiIndex column, pass a list of tuples " |
| 1505 | + "to the names_from argument." |
| 1506 | + ) |
| 1507 | + names_from = _check_tuples_multiindex( |
| 1508 | + df.columns, names_from, "names_from" |
| 1509 | + ) |
| 1510 | + else: |
| 1511 | + if is_list_like(names_from): |
| 1512 | + names_from = list(names_from) |
| 1513 | + indices = _select_index(names_from, df, axis="columns") |
| 1514 | + names_from = df.columns[indices] |
| 1515 | + if not is_list_like(names_from): |
| 1516 | + names_from = [names_from] |
| 1517 | + else: |
| 1518 | + names_from = list(names_from) |
1473 | 1519 |
|
1474 | 1520 | if values_from is not None:
|
1475 |
| - if is_list_like(values_from): |
1476 |
| - values_from = list(values_from) |
1477 |
| - out = _select_column_names(values_from, df) |
1478 |
| - out = list(out) |
| 1521 | + if is_multi_index: |
| 1522 | + if not isinstance(values_from, list): |
| 1523 | + raise TypeError( |
| 1524 | + "For a MultiIndex column, pass a list of tuples " |
| 1525 | + "to the values_from argument." |
| 1526 | + ) |
| 1527 | + out = _check_tuples_multiindex( |
| 1528 | + df.columns, values_from, "values_from" |
| 1529 | + ) |
| 1530 | + else: |
| 1531 | + if is_list_like(values_from): |
| 1532 | + values_from = list(values_from) |
| 1533 | + indices = _select_index(values_from, df, axis="columns") |
| 1534 | + out = df.columns[indices] |
| 1535 | + if not is_list_like(out): |
| 1536 | + out = [out] |
| 1537 | + else: |
| 1538 | + out = list(out) |
1479 | 1539 | # hack to align with pd.pivot
|
1480 | 1540 | if values_from == out[0]:
|
1481 | 1541 | values_from = out[0]
|
@@ -1550,3 +1610,27 @@ def _expand(indexer, retain_categories):
|
1550 | 1610 | ordered=indexer.ordered,
|
1551 | 1611 | )
|
1552 | 1612 | return indexer
|
| 1613 | + |
| 1614 | + |
| 1615 | +def _check_tuples_multiindex(indexer, args, param): |
| 1616 | + """ |
| 1617 | + Check entries for tuples, |
| 1618 | + if indexer is a MultiIndex. |
| 1619 | +
|
| 1620 | + Returns a list of tuples. |
| 1621 | + """ |
| 1622 | + all_tuples = (isinstance(arg, tuple) for arg in args) |
| 1623 | + if not all(all_tuples): |
| 1624 | + raise TypeError( |
| 1625 | + f"{param} must be a list of tuples " |
| 1626 | + "when the columns are a MultiIndex." |
| 1627 | + ) |
| 1628 | + |
| 1629 | + not_found = set(args).difference(indexer) |
| 1630 | + if any(not_found): |
| 1631 | + raise KeyError( |
| 1632 | + f"Tuples {*not_found,} in the {param} " |
| 1633 | + "argument do not exist in the dataframe's columns." |
| 1634 | + ) |
| 1635 | + |
| 1636 | + return args |
0 commit comments