Skip to content

Commit 70ed3df

Browse files
zhaoguochun1995zhaoguochun1995
and
zhaoguochun1995
authored
Zgc/diopi ascend fix python gil bug (DeepLink-org#762)
* delete python relate depandcy * optimize the location of releasing python GIL lock * add Python.h dir to include path * rename gil_scoped_release to GilScopedRelease --------- Co-authored-by: zhaoguochun1995 <[email protected]>
1 parent d50f920 commit 70ed3df

File tree

6 files changed

+41
-19
lines changed

6 files changed

+41
-19
lines changed

diopi_test/diopi_stub/codegen/gen.py

100644100755
+1-5
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,7 @@ def gen_functions(options, functions_fm):
166166
with open(os.path.join(_cur_dir, options.get('source_dir'), 'functions.h'), 'r', encoding='utf8')as f:
167167
content = f.readlines()
168168
exports = []
169-
device = options.get('device')
170-
if device == 'ascend':
171-
ft = OT.function_ascend_template
172-
else:
173-
ft = OT.function_template
169+
ft = OT.function_template
174170
exports = get_export(content, ft, exports)
175171
with open(os.path.join(_cur_dir, options.get('source_dir'), 'functions_ext.h'), 'r', encoding='utf8')as f:
176172
content_ext = f.readlines()

diopi_test/diopi_stub/codegen/op_template.py

-14
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,6 @@ class OpTemplate(object):
4141
return diopiError_t::diopiNoImplement;
4242
}
4343
});
44-
""")
45-
46-
function_ascend_template = CodeTemplate("""\
47-
m.def("${func_name}", [](${attrs}) {
48-
if (${func_name}) {
49-
${convert}
50-
py::gil_scoped_release no_gil;
51-
diopiError_t ret = ${call_func};
52-
${out_copy}
53-
return ret;
54-
} else {
55-
return diopiError_t::diopiNoImplement;
56-
}
57-
});
5844
""")
5945

6046
vector_template = CodeTemplate("""\

impl/ascend/common/acloprunner.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "acl/acl_op.h"
2222
#include "acl/acl_op_compiler.h"
2323
#include "debug.hpp"
24+
#include "gil_scoped_release.hpp"
2425
#include "impl_functions.hpp"
2526
#include "utils.hpp"
2627

@@ -619,6 +620,7 @@ class AclOpRunner {
619620
AclOpRunner& run() {
620621
diopiStreamHandle_t stream;
621622
diopiGetStream(context_, &stream);
623+
diopi::GilScopedRelease gilReleaeGuard;
622624
if (sync_) {
623625
CALL_ACLRT(aclopCompileAndExecuteV2(opname_.data(),
624626
inputIndex_,
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
3+
#include "Python.h"
4+
5+
namespace diopi {
6+
7+
// https://docs.python.org/zh-cn/3/c-api/init.html?highlight=pygilstate_check
8+
9+
class GilScopedRelease {
10+
private:
11+
PyThreadState* state_ = nullptr;
12+
13+
public:
14+
GilScopedRelease() {
15+
if (PyGILState_Check()) {
16+
state_ = PyEval_SaveThread();
17+
}
18+
}
19+
20+
~GilScopedRelease() {
21+
if (state_ != nullptr) {
22+
PyEval_RestoreThread(state_);
23+
state_ = nullptr;
24+
}
25+
}
26+
};
27+
28+
} // namespace diopi

impl/ascend_npu/CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@ add_custom_target(op_plugin_gen
2121
add_subdirectory(third_party/acl)
2222
include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/third_party/acl/inc)
2323

24+
if(NOT DEFINED PYTHON_INCLUDE_DIR)
25+
execute_process(
26+
COMMAND sh -c "dirname $(find $(dirname $(which python))/../ -name Python.h)"
27+
OUTPUT_VARIABLE PYTHON_INCLUDE_DIR)
28+
endif()
29+
include_directories(SYSTEM ${PYTHON_INCLUDE_DIR})
30+
message(STATUS "PYTHON_INCLUDE_DIR: " ${PYTHON_INCLUDE_DIR})
31+
2432
if(NOT DEFINED PYTORCH_DIR)
2533
execute_process(
2634
COMMAND sh -c "dirname $(python -c 'import torch;print(torch.__path__[0])')"

impl/ascend_npu/torch_npu/csrc/DIOPIAdapter.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <diopi/diopirt.h>
77
#include <torch/library.h>
88

9+
#include "../../../ascend/common/gil_scoped_release.hpp"
910
#include "diopi_impl/helper.hpp"
1011
#include "op_plugin/AclOpsInterface.h"
1112

@@ -2005,6 +2006,7 @@ class OpCommandImpl {
20052006
void OpCommandImpl::Run(bool sync, c10::SmallVector<int64_t, N>& sync_index, c10::SmallVector<at::Tensor, N>& outputTensor) {
20062007
NPU_LOGD("Op %s start run.", opName.c_str());
20072008
// RECORD_FUNCTION(opName, std::vector<c10::IValue>({}));
2009+
diopi::GilScopedRelease gilReleaeGuard;
20082010
ACL_REQUIRE_OK_OP(InnerRun(opName, execParam, sync, sync_index, outputTensor), opName.c_str());
20092011
NPU_LOGD("Op %s run over.", opName.c_str());
20102012
}

0 commit comments

Comments
 (0)