Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use PyWeakref_GetRef and critical section in BlockValuesRefs #60540

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ else
endif

cy = meson.get_compiler('cython')
cdata = configuration_data()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move the configuration_data next to the configure_file call? I assume here you did it to avoid repeating the cy.version() check, but I think it reads easier to keep this all together

if cy.version().version_compare('>=3.1.0')
add_project_arguments('-Xfreethreading_compatible=true', language: 'cython')
cdata.set('freethreading_compatible', '1')
else
cdata.set('freethreading_compatible', '0')
endif

# Needed by pandas.test() when it looks for the pytest ini options
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/free_threading_config.pxi.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Autogenerated file containing Cython compile-time defines

DEF CYTHON_COMPATIBLE_WITH_FREE_THREADING = @freethreading_compatible@
68 changes: 54 additions & 14 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ cimport cython
from cpython.object cimport PyObject
from cpython.pyport cimport PY_SSIZE_T_MAX
from cpython.slice cimport PySlice_GetIndicesEx
from cpython.weakref cimport (
PyWeakref_GetObject,
PyWeakref_NewRef,
)
from cpython.weakref cimport PyWeakref_NewRef
from cython cimport Py_ssize_t

import numpy as np
Expand All @@ -29,6 +26,14 @@ from pandas._libs.util cimport (
is_integer_object,
)

include "free_threading_config.pxi"

IF CYTHON_COMPATIBLE_WITH_FREE_THREADING:
from cpython.ref cimport Py_DECREF
from cpython.weakref cimport PyWeakref_GetRef
ELSE:
from cpython.weakref cimport PyWeakref_GetObject


cdef extern from "Python.h":
PyObject* Py_None
Expand Down Expand Up @@ -908,17 +913,35 @@ cdef class BlockValuesRefs:
# if force=False. Clearing for every insertion causes slowdowns if
# all these objects stay alive, e.g. df.items() for wide DataFrames
# see GH#55245 and GH#55008
IF CYTHON_COMPATIBLE_WITH_FREE_THREADING:
cdef PyObject* pobj
cdef bint status

if force or len(self.referenced_blocks) > self.clear_counter:
self.referenced_blocks = [
ref for ref in self.referenced_blocks
if PyWeakref_GetObject(ref) != Py_None
]
IF CYTHON_COMPATIBLE_WITH_FREE_THREADING:
new_referenced_blocks = []
for ref in self.referenced_blocks:
status = PyWeakref_GetRef(ref, &pobj)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
status = PyWeakref_GetRef(ref, &pobj)
cdef PyObject *status = PyWeakref_GetRef(ref, &pobj)

Can you just declare / define all in one go? I don't think we need separate blocks for those (same comment for the status variable)

if status == 1:
Copy link
Member

@WillAyd WillAyd Jan 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if the reference is dead we are intentionally doing nothing here right? We don't need to handle that at all?

(I'm not very familiar with the referencing here)

new_referenced_blocks.append(ref)
Py_DECREF(<object>pobj)
self.referenced_blocks = new_referenced_blocks
ELSE:
self.referenced_blocks = [
ref for ref in self.referenced_blocks
if PyWeakref_GetObject(ref) != Py_None
]

nr_of_refs = len(self.referenced_blocks)
if nr_of_refs < self.clear_counter // 2:
self.clear_counter = max(self.clear_counter // 2, 500)
elif nr_of_refs > self.clear_counter:
self.clear_counter = max(self.clear_counter * 2, nr_of_refs)

cpdef _add_reference_maybe_locked(self, Block blk):
self._clear_dead_references()
self.referenced_blocks.append(PyWeakref_NewRef(blk, None))

cpdef add_reference(self, Block blk):
"""Adds a new reference to our reference collection.

Expand All @@ -927,8 +950,15 @@ cdef class BlockValuesRefs:
blk : Block
The block that the new references should point to.
"""
IF CYTHON_COMPATIBLE_WITH_FREE_THREADING:
with cython.critical_section(self):
self._add_reference_maybe_locked(blk)
ELSE:
self._add_reference_maybe_locked(blk)

def _add_index_reference_maybe_locked(self, index: object) -> None:
self._clear_dead_references()
self.referenced_blocks.append(PyWeakref_NewRef(blk, None))
self.referenced_blocks.append(PyWeakref_NewRef(index, None))

def add_index_reference(self, index: object) -> None:
"""Adds a new reference to our reference collection when creating an index.
Expand All @@ -938,8 +968,16 @@ cdef class BlockValuesRefs:
index : Index
The index that the new reference should point to.
"""
self._clear_dead_references()
self.referenced_blocks.append(PyWeakref_NewRef(index, None))
IF CYTHON_COMPATIBLE_WITH_FREE_THREADING:
with cython.critical_section(self):
self._add_index_reference_maybe_locked(index)
ELSE:
self._add_index_reference_maybe_locked(index)

def _has_reference_maybe_locked(self) -> bool:
self._clear_dead_references(force=True)
# Checking for more references than block pointing to itself
return len(self.referenced_blocks) > 1

def has_reference(self) -> bool:
"""Checks if block has foreign references.
Expand All @@ -951,6 +989,8 @@ cdef class BlockValuesRefs:
-------
bool
"""
self._clear_dead_references(force=True)
# Checking for more references than block pointing to itself
return len(self.referenced_blocks) > 1
IF CYTHON_COMPATIBLE_WITH_FREE_THREADING:
with cython.critical_section(self):
return self._has_reference_maybe_locked()
ELSE:
return self._has_reference_maybe_locked()
7 changes: 7 additions & 0 deletions pandas/_libs/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ _khash_primitive_helper_dep = declare_dependency(
sources: _khash_primitive_helper,
)

_free_threading_config = configure_file(
input: 'free_threading_config.pxi.in',
output: 'free_threading_config.pxi',
configuration: cdata,
install: false,
)

subdir('tslibs')

libs_sources = {
Expand Down
Loading