pandas-dev
diff --git a/‎doc/source/user_guide/cookbook.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/cookbook.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_libs/interval.pyx
Lines changed: 6 additions & 0 deletions b/‎pandas/_libs/interval.pyx
Lines changed: 6 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/period.pyx
Lines changed: 6 additions & 0 deletions b/‎pandas/_libs/tslibs/period.pyx
Lines changed: 6 additions & 0 deletions
diff --git a/‎pandas/_typing.py
Lines changed: 3 additions & 1 deletion b/‎pandas/_typing.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/core/computation/parsing.py
Lines changed: 0 additions & 44 deletions b/‎pandas/core/computation/parsing.py
Lines changed: 0 additions & 44 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 16 additions & 2 deletions b/‎pandas/core/frame.py
Lines changed: 16 additions & 2 deletions
diff --git a/‎pandas/core/nanops.py
Lines changed: 9 additions & 2 deletions b/‎pandas/core/nanops.py
Lines changed: 9 additions & 2 deletions
diff --git a/‎pandas/core/reshape/merge.py
Lines changed: 86 additions & 11 deletions b/‎pandas/core/reshape/merge.py
Lines changed: 86 additions & 11 deletions
diff --git a/‎pandas/io/formats/printing.py
Lines changed: 2 additions & 0 deletions b/‎pandas/io/formats/printing.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/io/orc.py
Lines changed: 7 additions & 0 deletions b/‎pandas/io/orc.py
Lines changed: 7 additions & 0 deletions
@@ -874,7 +874,7 @@ Timeseries
 <https://stackoverflow.com/questions/13893227/vectorized-look-up-of-values-in-pandas-dataframe>`__
 
 `Aggregation and plotting time series
-<https://nipunbatra.github.io/blog/visualisation/2013/05/01/aggregation-timeseries.html>`__
+<https://nipunbatra.github.io/blog/posts/2013-05-01-aggregation-timeseries.html>`__
 
 Turn a matrix with hours in columns and days in rows into a continuous row sequence in the form of a time series.
 `How to rearrange a Python pandas DataFrame?
 
@@ -35,6 +35,7 @@ Other enhancements
 - :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
+- :func:`pandas.merge`, :meth:`DataFrame.merge` and :meth:`DataFrame.join` now support anti joins (``left_anti`` and ``right_anti``) in the ``how`` parameter (:issue:`42916`)
 - :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`)
 - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
@@ -68,6 +69,7 @@ Other enhancements
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`Series.str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
+- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
@@ -631,6 +633,7 @@ Datetimelike
 - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
 - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
 - Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
+- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`)
 - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
 - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
 - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
 
@@ -209,6 +209,12 @@ cdef class IntervalMixin:
         """
         Indicates if an interval is empty, meaning it contains no points.
 
+        An interval is considered empty if its `left` and `right` endpoints
+        are equal, and it is not closed on both sides. This means that the
+        interval does not include any real points. In the case of an
+        :class:`pandas.arrays.IntervalArray` or :class:`IntervalIndex`, the
+        property returns a boolean array indicating the emptiness of each interval.
+
         Returns
         -------
         bool or ndarray
 
@@ -2140,6 +2140,12 @@ cdef class _Period(PeriodMixin):
         """
         Get day of the month that a Period falls on.
 
+        The `day` property provides a simple way to access the day component
+        of a `Period` object, which represents time spans in various frequencies
+        (e.g., daily, hourly, monthly). If the period's frequency does not include
+        a day component (e.g., yearly or quarterly periods), the returned day
+        corresponds to the first day of that period.
+
         Returns
         -------
         int
 
@@ -442,7 +442,9 @@ def closed(self) -> bool:
 AnyAll = Literal["any", "all"]
 
 # merge
-MergeHow = Literal["left", "right", "inner", "outer", "cross"]
+MergeHow = Literal[
+    "left", "right", "inner", "outer", "cross", "left_anti", "right_anti"
+]
 MergeValidate = Literal[
     "one_to_one",
     "1:1",
 
@@ -123,16 +123,6 @@ def clean_column_name(name: Hashable) -> Hashable:
     -------
     name : hashable
         Returns the name after tokenizing and cleaning.
-
-    Notes
-    -----
-        For some cases, a name cannot be converted to a valid Python identifier.
-        In that case :func:`tokenize_string` raises a SyntaxError.
-        In that case, we just return the name unmodified.
-
-        If this name was used in the query string (this makes the query call impossible)
-        an error will be raised by :func:`tokenize_backtick_quoted_string` instead,
-        which is not caught and propagates to the user level.
     """
     try:
         # Escape backticks
@@ -145,40 +135,6 @@ def clean_column_name(name: Hashable) -> Hashable:
         return name
 
 
-def tokenize_backtick_quoted_string(
-    token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int
-) -> tuple[int, str]:
-    """
-    Creates a token from a backtick quoted string.
-
-    Moves the token_generator forwards till right after the next backtick.
-
-    Parameters
-    ----------
-    token_generator : Iterator[tokenize.TokenInfo]
-        The generator that yields the tokens of the source string (Tuple[int, str]).
-        The generator is at the first token after the backtick (`)
-
-    source : str
-        The Python source code string.
-
-    string_start : int
-        This is the start of backtick quoted string inside the source string.
-
-    Returns
-    -------
-    tok: Tuple[int, str]
-        The token that represents the backtick quoted string.
-        The integer is equal to BACKTICK_QUOTED_STRING (100).
-    """
-    for _, tokval, start, _, _ in token_generator:
-        if tokval == "`":
-            string_end = start[1]
-            break
-
-    return BACKTICK_QUOTED_STRING, source[string_start:string_end]
-
-
 class ParseState(Enum):
     DEFAULT = 0
     IN_BACKTICK = 1
 
@@ -315,7 +315,8 @@
 ----------%s
 right : DataFrame or named Series
     Object to merge with.
-how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner'
+how : {'left', 'right', 'outer', 'inner', 'cross', 'left_anti', 'right_anti'},
+    default 'inner'
     Type of merge to be performed.
 
     * left: use only keys from left frame, similar to a SQL left outer join;
@@ -328,6 +329,10 @@
       join; preserve the order of the left keys.
     * cross: creates the cartesian product from both frames, preserves the order
       of the left keys.
+    * left_anti: use only keys from left frame that are not in right frame, similar
+      to SQL left anti join; preserve key order.
+    * right_anti: use only keys from right frame that are not in left frame, similar
+      to SQL right anti join; preserve key order.
 on : label or list
     Column or index level names to join on. These must be found in both
     DataFrames. If `on` is None and not merging on indexes then this defaults
@@ -4793,6 +4798,10 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame:
         """
         Return a subset of the DataFrame's columns based on the column dtypes.
 
+        This method allows for filtering columns based on their data types.
+        It is useful when working with heterogeneous DataFrames where operations
+        need to be performed on a specific subset of data types.
+
         Parameters
         ----------
         include, exclude : scalar or list-like
@@ -10609,7 +10618,8 @@ def join(
             values given, the `other` DataFrame must have a MultiIndex. Can
             pass an array as the join key if it is not already contained in
             the calling DataFrame. Like an Excel VLOOKUP operation.
-        how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'left'
+        how : {'left', 'right', 'outer', 'inner', 'cross', 'left_anti', 'right_anti'},
+            default 'left'
             How to handle the operation of the two objects.
 
             * left: use calling frame's index (or column if on is specified)
@@ -10621,6 +10631,10 @@ def join(
               of the calling's one.
             * cross: creates the cartesian product from both frames, preserves the order
               of the left keys.
+            * left_anti: use set difference of calling frame's index and `other`'s
+              index.
+            * right_anti: use set difference of `other`'s index and calling frame's
+              index.
         lsuffix : str, default ''
             Suffix to use from left frame's overlapping columns.
         rsuffix : str, default ''
 
@@ -1093,11 +1093,14 @@ def reduction(
         if values.size == 0:
             return _na_for_min_count(values, axis)
 
+        dtype = values.dtype
         values, mask = _get_values(
             values, skipna, fill_value_typ=fill_value_typ, mask=mask
         )
         result = getattr(values, meth)(axis)
-        result = _maybe_null_out(result, axis, mask, values.shape)
+        result = _maybe_null_out(
+            result, axis, mask, values.shape, datetimelike=dtype.kind in "mM"
+        )
         return result
 
     return reduction
@@ -1499,6 +1502,7 @@ def _maybe_null_out(
     mask: npt.NDArray[np.bool_] | None,
     shape: tuple[int, ...],
     min_count: int = 1,
+    datetimelike: bool = False,
 ) -> np.ndarray | float | NaTType:
     """
     Returns
@@ -1520,7 +1524,10 @@ def _maybe_null_out(
             null_mask = np.broadcast_to(below_count, new_shape)
 
         if np.any(null_mask):
-            if is_numeric_dtype(result):
+            if datetimelike:
+                # GH#60646 For datetimelike, no need to cast to float
+                result[null_mask] = iNaT
+            elif is_numeric_dtype(result):
                 if np.iscomplexobj(result):
                     result = result.astype("c16")
                 elif not is_float_dtype(result):
 
@@ -180,7 +180,8 @@ def merge(
         First pandas object to merge.
     right : DataFrame or named Series
         Second pandas object to merge.
-    how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner'
+    how : {'left', 'right', 'outer', 'inner', 'cross', 'left_anti', 'right_anti},
+        default 'inner'
         Type of merge to be performed.
 
         * left: use only keys from left frame, similar to a SQL left outer join;
@@ -193,6 +194,10 @@ def merge(
           join; preserve the order of the left keys.
         * cross: creates the cartesian product from both frames, preserves the order
           of the left keys.
+        * left_anti: use only keys from left frame that are not in right frame, similar
+          to SQL left anti join; preserve key order.
+        * right_anti: use only keys from right frame that are not in left frame, similar
+          to SQL right anti join; preserve key order.
     on : label or list
         Column or index level names to join on. These must be found in both
         DataFrames. If `on` is None and not merging on indexes then this defaults
@@ -953,7 +958,7 @@ def __init__(
         self,
         left: DataFrame | Series,
         right: DataFrame | Series,
-        how: JoinHow | Literal["asof"] = "inner",
+        how: JoinHow | Literal["left_anti", "right_anti", "asof"] = "inner",
         on: IndexLabel | AnyArrayLike | None = None,
         left_on: IndexLabel | AnyArrayLike | None = None,
         right_on: IndexLabel | AnyArrayLike | None = None,
@@ -968,7 +973,7 @@ def __init__(
         _right = _validate_operand(right)
         self.left = self.orig_left = _left
         self.right = self.orig_right = _right
-        self.how = how
+        self.how, self.anti_join = self._validate_how(how)
 
         self.on = com.maybe_make_list(on)
 
@@ -998,14 +1003,6 @@ def __init__(
             )
             raise MergeError(msg)
 
-        # GH 59435: raise when "how" is not a valid Merge type
-        merge_type = {"left", "right", "inner", "outer", "cross", "asof"}
-        if how not in merge_type:
-            raise ValueError(
-                f"'{how}' is not a valid Merge type: "
-                f"left, right, inner, outer, cross, asof"
-            )
-
         self.left_on, self.right_on = self._validate_left_right_on(left_on, right_on)
 
         (
@@ -1035,6 +1032,37 @@ def __init__(
         if validate is not None:
             self._validate_validate_kwd(validate)
 
+    @final
+    def _validate_how(
+        self, how: JoinHow | Literal["left_anti", "right_anti", "asof"]
+    ) -> tuple[JoinHow | Literal["asof"], bool]:
+        """
+        Validate the 'how' parameter and return the actual join type and whether
+        this is an anti join.
+        """
+        # GH 59435: raise when "how" is not a valid Merge type
+        merge_type = {
+            "left",
+            "right",
+            "inner",
+            "outer",
+            "left_anti",
+            "right_anti",
+            "cross",
+            "asof",
+        }
+        if how not in merge_type:
+            raise ValueError(
+                f"'{how}' is not a valid Merge type: "
+                f"left, right, inner, outer, left_anti, right_anti, cross, asof"
+            )
+        anti_join = False
+        if how in {"left_anti", "right_anti"}:
+            how = how.split("_")[0]  # type: ignore[assignment]
+            anti_join = True
+        how = cast(JoinHow | Literal["asof"], how)
+        return how, anti_join
+
     def _maybe_require_matching_dtypes(
         self, left_join_keys: list[ArrayLike], right_join_keys: list[ArrayLike]
     ) -> None:
@@ -1405,6 +1433,11 @@ def _get_join_info(
                 n = len(left_ax) if left_indexer is None else len(left_indexer)
                 join_index = default_index(n)
 
+        if self.anti_join:
+            join_index, left_indexer, right_indexer = self._handle_anti_join(
+                join_index, left_indexer, right_indexer
+            )
+
         return join_index, left_indexer, right_indexer
 
     @final
@@ -1447,6 +1480,48 @@ def _create_join_index(
             return index.copy()
         return index.take(indexer)
 
+    @final
+    def _handle_anti_join(
+        self,
+        join_index: Index,
+        left_indexer: npt.NDArray[np.intp] | None,
+        right_indexer: npt.NDArray[np.intp] | None,
+    ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
+        """
+        Handle anti join by returning the correct join index and indexers
+
+        Parameters
+        ----------
+        join_index : Index
+            join index
+        left_indexer : np.ndarray[np.intp] or None
+            left indexer
+        right_indexer : np.ndarray[np.intp] or None
+            right indexer
+
+        Returns
+        -------
+        Index, np.ndarray[np.intp] or None, np.ndarray[np.intp] or None
+        """
+        # Make sure indexers are not None
+        if left_indexer is None:
+            left_indexer = np.arange(len(self.left))
+        if right_indexer is None:
+            right_indexer = np.arange(len(self.right))
+
+        assert self.how in {"left", "right"}
+        if self.how == "left":
+            # Filter to rows where left keys are not in right keys
+            filt = right_indexer == -1
+        else:
+            # Filter to rows where right keys are not in left keys
+            filt = left_indexer == -1
+        join_index = join_index[filt]
+        left_indexer = left_indexer[filt]
+        right_indexer = right_indexer[filt]
+
+        return join_index, left_indexer, right_indexer
+
     @final
     def _get_merge_keys(
         self,
 
@@ -111,6 +111,8 @@ def _pprint_seq(
     """
     if isinstance(seq, set):
         fmt = "{{{body}}}"
+    elif isinstance(seq, frozenset):
+        fmt = "frozenset({body})"
     else:
         fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
 
 
@@ -45,6 +45,13 @@ def read_orc(
     """
     Load an ORC object from the file path, returning a DataFrame.
 
+    This method reads an ORC (Optimized Row Columnar) file into a pandas
+    DataFrame using the `pyarrow.orc` library. ORC is a columnar storage format
+    that provides efficient compression and fast retrieval for analytical workloads.
+    It allows reading specific columns, handling different filesystem
+    types (such as local storage, cloud storage via fsspec, or pyarrow filesystem),
+    and supports different data type backends, including `numpy_nullable` and `pyarrow`.
+
     Parameters
     ----------
     path : str, path object, or file-like object