Skip to content

Commit 49df226

Browse files
mroeschkegalipremsagar
authored andcommitted
Use .plc_column instead of .to_pylibcudf in rolling, string utilties (#20562)
Follow up to #20306 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: #20562
1 parent 67da85b commit 49df226

File tree

7 files changed

+11
-14
lines changed

7 files changed

+11
-14
lines changed

python/cudf/cudf/core/byte_pair_encoding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class BytePairEncoder:
2525

2626
def __init__(self, merges_pair: Series) -> None:
2727
self.merge_pairs = plc.nvtext.byte_pair_encode.BPEMergePairs(
28-
merges_pair._column.to_pylibcudf(mode="read")
28+
merges_pair._column.plc_column
2929
)
3030

3131
def __call__(self, text: Series, separator: str = " ") -> Series:

python/cudf/cudf/core/character_normalizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131
if special_tokens is None:
3232
special_tokens = Series([], dtype="object")
3333
self.normalizer = plc.nvtext.normalize.CharacterNormalizer(
34-
do_lower, special_tokens._column.to_pylibcudf(mode="read")
34+
do_lower, special_tokens._column.plc_column
3535
)
3636

3737
def normalize(self, text: Series) -> Series:

python/cudf/cudf/core/tokenize_vocabulary.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class TokenizeVocabulary:
2020

2121
def __init__(self, vocabulary: Series) -> None:
2222
self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary(
23-
vocabulary._column.to_pylibcudf(mode="read")
23+
vocabulary._column.plc_column
2424
)
2525

2626
def tokenize(

python/cudf/cudf/core/tools/datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,7 @@ def _datetime_binop(
664664
with acquire_spill_lock():
665665
datetime_col = type(datetime_col).from_pylibcudf(
666666
plc.datetime.add_calendrical_months(
667-
datetime_col.to_pylibcudf(mode="read"),
667+
datetime_col.plc_column,
668668
pa_scalar_to_plc_scalar(pa.scalar(value)),
669669
)
670670
)

python/cudf/cudf/core/udf/utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,7 @@ def _get_input_args_from_frame(fr: IndexedFrame) -> list:
242242
offsets = []
243243
for col in _supported_cols_from_frame(fr).values():
244244
if col.dtype == CUDF_STRING_DTYPE:
245-
data = column_to_string_view_array_init_heap(
246-
col.to_pylibcudf(mode="read")
247-
)
245+
data = column_to_string_view_array_init_heap(col.plc_column)
248246
else:
249247
data = col.data
250248
if col.mask is not None:

python/cudf/cudf/core/window/rolling.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,15 +304,14 @@ def _plc_windows(self) -> WindowTypePair:
304304
orderby_obj = as_column(range(len(self.obj)))
305305
if self._group_keys is not None:
306306
group_cols: list[plc.Column] = [
307-
col.to_pylibcudf(mode="read")
308-
for col in self._group_keys._columns
307+
col.plc_column for col in self._group_keys._columns
309308
]
310309
else:
311310
group_cols = []
312311
group_keys = plc.Table(group_cols)
313312
return plc.rolling.make_range_windows(
314313
group_keys,
315-
orderby_obj.to_pylibcudf(mode="read"),
314+
orderby_obj.plc_column,
316315
plc.types.Order.ASCENDING,
317316
plc.types.NullOrder.BEFORE,
318317
plc.rolling.BoundedOpen(plc.Scalar.from_py(pre)),
@@ -337,8 +336,8 @@ def _plc_windows(self) -> WindowTypePair:
337336
SIZE_TYPE_DTYPE
338337
)
339338
return (
340-
preceding_window.to_pylibcudf(mode="read"),
341-
following_window.to_pylibcudf(mode="read"),
339+
preceding_window.plc_column,
340+
following_window.plc_column,
342341
)
343342
else:
344343
raise ValueError(
@@ -359,7 +358,7 @@ def _apply_agg_column(
359358
with acquire_spill_lock():
360359
return ColumnBase.from_pylibcudf(
361360
plc.rolling.rolling_window(
362-
source_column.to_pylibcudf(mode="read"),
361+
source_column.plc_column,
363362
pre,
364363
fwd,
365364
self.min_periods or 1,

python/cudf/cudf/core/wordpiece_tokenize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class WordPieceVocabulary:
2020

2121
def __init__(self, vocabulary: Series) -> None:
2222
self.vocabulary = plc.nvtext.wordpiece_tokenize.WordPieceVocabulary(
23-
vocabulary._column.to_pylibcudf(mode="read")
23+
vocabulary._column.plc_column
2424
)
2525

2626
def tokenize(self, text: Series, max_words_per_row: int = 0) -> Series:

0 commit comments

Comments
 (0)