-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[lldb] Support the Python stable C API in PythonString::AsUTF8 #152212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This conditionally reimplements PythonString::AsUTF8 using PyUnicode_AsUTF8String instead of PyUnicode_AsUTF8AndSize. The latter is slightly more efficient because it caches the result, which also means we can return a StringRef instead of having to allocate a std::string. We pick the most efficient one depending on whether or not we're targeting the Python 3.10 stable API, however we now always return the result as a std::string.
@llvm/pr-subscribers-lldb Author: Jonas Devlieghere (JDevlieghere) ChangesThis conditionally reimplements We pick the most efficient one depending on whether or not we're targeting the Python 3.10 stable API, however we now always return the result as a Full diff: https://github.com/llvm/llvm-project/pull/152212.diff 4 Files Affected:
diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig
index 2c30d536a753d..7f00ef43913f8 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -173,7 +173,7 @@ bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallTypeScript(
else
result = pfunc(value_arg, dict, SWIGBridge::ToSWIGWrapper(*options_sp));
- retval = result.Str().GetString().str();
+ retval = result.Str().GetString();
return true;
}
@@ -604,7 +604,7 @@ lldb_private::python::SWIGBridge::LLDBSwigPythonGetRepeatCommandForScriptedComma
if (result.IsNone())
return std::nullopt;
- return result.Str().GetString().str();
+ return result.Str().GetString();
}
StructuredData::DictionarySP
@@ -807,7 +807,7 @@ bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordProcess(
auto result = pfunc(SWIGBridge::ToSWIGWrapper(process), dict);
- output = result.Str().GetString().str();
+ output = result.Str().GetString();
return true;
}
@@ -831,7 +831,7 @@ std::optional<std::string> lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(thread)), dict);
- return result.Str().GetString().str();
+ return result.Str().GetString();
}
bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
@@ -854,7 +854,7 @@ bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
auto result = pfunc(SWIGBridge::ToSWIGWrapper(target), dict);
- output = result.Str().GetString().str();
+ output = result.Str().GetString();
return true;
}
@@ -878,7 +878,7 @@ std::optional<std::string> lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(frame)), dict);
- return result.Str().GetString().str();
+ return result.Str().GetString();
}
bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
@@ -901,7 +901,7 @@ bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
auto result = pfunc(SWIGBridge::ToSWIGWrapper(value), dict);
- output = result.Str().GetString().str();
+ output = result.Str().GetString();
return true;
}
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 4a45673a6ebbc..7845e56c8c05c 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -405,28 +405,40 @@ bool PythonString::Check(PyObject *py_obj) {
return false;
}
-llvm::StringRef PythonString::GetString() const {
+std::string PythonString::GetString() const {
auto s = AsUTF8();
if (!s) {
llvm::consumeError(s.takeError());
- return llvm::StringRef("");
+ return "";
}
return s.get();
}
-Expected<llvm::StringRef> PythonString::AsUTF8() const {
+Expected<std::string> PythonString::AsUTF8() const {
if (!IsValid())
return nullDeref();
- Py_ssize_t size;
- const char *data;
+ // PyUnicode_AsUTF8AndSize caches the UTF-8 representation of the string in
+ // the Unicode object, making it more efficient than PyUnicode_AsUTF8String.
+#if defined(Py_LIMITED_API) && (Py_LIMITED_API < 0x030a0000)
+ PyObject *py_bytes = PyUnicode_AsUTF8String(m_py_obj);
+ if (!py_bytes)
+ return exception();
- data = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+ auto release_py_str =
+ llvm::make_scope_exit([py_bytes] { Py_DECREF(py_bytes); });
- if (!data)
+ Py_ssize_t size = PyBytes_Size(py_bytes);
+ const char *str = PyBytes_AsString(py_bytes);
+#else
+ Py_ssize_t size;
+ const char *str = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+#endif
+
+ if (!str)
return exception();
- return llvm::StringRef(data, size);
+ return std::string(str, size);
}
size_t PythonString::GetSize() const {
@@ -1248,12 +1260,12 @@ class TextPythonFile : public PythonIOFile {
// EOF
return Status();
}
- auto stringref = pystring.get().AsUTF8();
- if (!stringref)
+ auto str = pystring.get().AsUTF8();
+ if (!str)
// Cloning since the wrapped exception may still reference the PyThread.
- return Status::FromError(stringref.takeError()).Clone();
- num_bytes = stringref.get().size();
- memcpy(buf, stringref.get().begin(), num_bytes);
+ return Status::FromError(str.takeError()).Clone();
+ num_bytes = str->size();
+ memcpy(buf, str->c_str(), num_bytes);
return Status();
}
};
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
index 45bb499248329..9586eedd03176 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
@@ -460,9 +460,9 @@ class PythonString : public TypedPythonObject<PythonString> {
static bool Check(PyObject *py_obj);
- llvm::StringRef GetString() const; // safe, empty string on error
+ std::string GetString() const; // safe, empty string on error
- llvm::Expected<llvm::StringRef> AsUTF8() const;
+ llvm::Expected<std::string> AsUTF8() const;
size_t GetSize() const;
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index 24d604f22a765..b98ff035e5798 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -2715,8 +2715,8 @@ bool ScriptInterpreterPythonImpl::GetShortHelpForCommandObject(
if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
PythonString py_string(PyRefType::Borrowed, py_return.get());
- llvm::StringRef return_data(py_string.GetString());
- dest.assign(return_data.data(), return_data.size());
+ std::string return_data = py_string.GetString();
+ dest.assign(return_data.c_str(), return_data.size());
return true;
}
@@ -2996,8 +2996,8 @@ bool ScriptInterpreterPythonImpl::GetLongHelpForCommandObject(
bool got_string = false;
if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
PythonString str(PyRefType::Borrowed, py_return.get());
- llvm::StringRef str_data(str.GetString());
- dest.assign(str_data.data(), str_data.size());
+ std::string str_data = str.GetString();
+ dest.assign(str_data.c_str(), str_data.size());
got_string = true;
}
|
Part of #151617 |
Ugh, changing the return type doesn't work because the SWIG typemaps assume the underlying string outlives the stack-allocated |
This conditionally reimplements
PythonString::AsUTF8
usingPyUnicode_AsUTF8String
instead ofPyUnicode_AsUTF8AndSize
. The latter is slightly more efficient because it caches the result, which also means we can return anllvm::StringRef
instead of having to allocate astd::string
.We pick the most efficient one depending on whether or not we're targeting the Python 3.10 stable API, however we now always return the result as a
std::string
.