Use .plc_column instead of .to_pylibcudf in rolling, string utilties (#20562)

mroeschke · galipremsagar · commit 49df22652e93 · 2025-11-18T17:40:03.000Z
Follow up to #20306 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: #20562
diff --git a/python/cudf/cudf/core/byte_pair_encoding.py b/python/cudf/cudf/core/byte_pair_encoding.py
@@ -25,7 +25,7 @@ class BytePairEncoder:
 
     def __init__(self, merges_pair: Series) -> None:
         self.merge_pairs = plc.nvtext.byte_pair_encode.BPEMergePairs(
-            merges_pair._column.to_pylibcudf(mode="read")
+            merges_pair._column.plc_column
         )
 
     def __call__(self, text: Series, separator: str = " ") -> Series:
diff --git a/python/cudf/cudf/core/character_normalizer.py b/python/cudf/cudf/core/character_normalizer.py
@@ -31,7 +31,7 @@ def __init__(
         if special_tokens is None:
             special_tokens = Series([], dtype="object")
         self.normalizer = plc.nvtext.normalize.CharacterNormalizer(
-            do_lower, special_tokens._column.to_pylibcudf(mode="read")
+            do_lower, special_tokens._column.plc_column
         )
 
     def normalize(self, text: Series) -> Series:
diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py
@@ -20,7 +20,7 @@ class TokenizeVocabulary:
 
     def __init__(self, vocabulary: Series) -> None:
         self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary(
-            vocabulary._column.to_pylibcudf(mode="read")
+            vocabulary._column.plc_column
         )
 
     def tokenize(
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
@@ -664,7 +664,7 @@ def _datetime_binop(
                     with acquire_spill_lock():
                         datetime_col = type(datetime_col).from_pylibcudf(
                             plc.datetime.add_calendrical_months(
-                                datetime_col.to_pylibcudf(mode="read"),
+                                datetime_col.plc_column,
                                 pa_scalar_to_plc_scalar(pa.scalar(value)),
                             )
                         )
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
@@ -242,9 +242,7 @@ def _get_input_args_from_frame(fr: IndexedFrame) -> list:
     offsets = []
     for col in _supported_cols_from_frame(fr).values():
         if col.dtype == CUDF_STRING_DTYPE:
-            data = column_to_string_view_array_init_heap(
-                col.to_pylibcudf(mode="read")
-            )
+            data = column_to_string_view_array_init_heap(col.plc_column)
         else:
             data = col.data
         if col.mask is not None:
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
@@ -304,15 +304,14 @@ def _plc_windows(self) -> WindowTypePair:
                 orderby_obj = as_column(range(len(self.obj)))
             if self._group_keys is not None:
                 group_cols: list[plc.Column] = [
-                    col.to_pylibcudf(mode="read")
-                    for col in self._group_keys._columns
+                    col.plc_column for col in self._group_keys._columns
                 ]
             else:
                 group_cols = []
             group_keys = plc.Table(group_cols)
             return plc.rolling.make_range_windows(
                 group_keys,
-                orderby_obj.to_pylibcudf(mode="read"),
+                orderby_obj.plc_column,
                 plc.types.Order.ASCENDING,
                 plc.types.NullOrder.BEFORE,
                 plc.rolling.BoundedOpen(plc.Scalar.from_py(pre)),
@@ -337,8 +336,8 @@ def _plc_windows(self) -> WindowTypePair:
                 SIZE_TYPE_DTYPE
             )
             return (
-                preceding_window.to_pylibcudf(mode="read"),
-                following_window.to_pylibcudf(mode="read"),
+                preceding_window.plc_column,
+                following_window.plc_column,
             )
         else:
             raise ValueError(
@@ -359,7 +358,7 @@ def _apply_agg_column(
         with acquire_spill_lock():
             return ColumnBase.from_pylibcudf(
                 plc.rolling.rolling_window(
-                    source_column.to_pylibcudf(mode="read"),
+                    source_column.plc_column,
                     pre,
                     fwd,
                     self.min_periods or 1,
diff --git a/python/cudf/cudf/core/wordpiece_tokenize.py b/python/cudf/cudf/core/wordpiece_tokenize.py
@@ -20,7 +20,7 @@ class WordPieceVocabulary:
 
     def __init__(self, vocabulary: Series) -> None:
         self.vocabulary = plc.nvtext.wordpiece_tokenize.WordPieceVocabulary(
-            vocabulary._column.to_pylibcudf(mode="read")
+            vocabulary._column.plc_column
         )
 
     def tokenize(self, text: Series, max_words_per_row: int = 0) -> Series:

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ class BytePairEncoder:`
`25`	`25`
`26`	`26`	`def __init__(self, merges_pair: Series) -> None:`
`27`	`27`	`self.merge_pairs = plc.nvtext.byte_pair_encode.BPEMergePairs(`
`28`		`- merges_pair._column.to_pylibcudf(mode="read")`
	`28`	`+ merges_pair._column.plc_column`
`29`	`29`	`)`
`30`	`30`
`31`	`31`	`def __call__(self, text: Series, separator: str = " ") -> Series:`
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@ def __init__(`
`31`	`31`	`if special_tokens is None:`
`32`	`32`	`special_tokens = Series([], dtype="object")`
`33`	`33`	`self.normalizer = plc.nvtext.normalize.CharacterNormalizer(`
`34`		`- do_lower, special_tokens._column.to_pylibcudf(mode="read")`
	`34`	`+ do_lower, special_tokens._column.plc_column`
`35`	`35`	`)`
`36`	`36`
`37`	`37`	`def normalize(self, text: Series) -> Series:`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ class TokenizeVocabulary:`
`20`	`20`
`21`	`21`	`def __init__(self, vocabulary: Series) -> None:`
`22`	`22`	`self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary(`
`23`		`- vocabulary._column.to_pylibcudf(mode="read")`
	`23`	`+ vocabulary._column.plc_column`
`24`	`24`	`)`
`25`	`25`
`26`	`26`	`def tokenize(`
Original file line number	Diff line number	Diff line change
`@@ -664,7 +664,7 @@ def _datetime_binop(`
`664`	`664`	`with acquire_spill_lock():`
`665`	`665`	`datetime_col = type(datetime_col).from_pylibcudf(`
`666`	`666`	`plc.datetime.add_calendrical_months(`
`667`		`- datetime_col.to_pylibcudf(mode="read"),`
	`667`	`+ datetime_col.plc_column,`
`668`	`668`	`pa_scalar_to_plc_scalar(pa.scalar(value)),`
`669`	`669`	`)`
`670`	`670`	`)`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ class WordPieceVocabulary:`
`20`	`20`
`21`	`21`	`def __init__(self, vocabulary: Series) -> None:`
`22`	`22`	`self.vocabulary = plc.nvtext.wordpiece_tokenize.WordPieceVocabulary(`
`23`		`- vocabulary._column.to_pylibcudf(mode="read")`
	`23`	`+ vocabulary._column.plc_column`
`24`	`24`	`)`
`25`	`25`
`26`	`26`	`def tokenize(self, text: Series, max_words_per_row: int = 0) -> Series:`