Skip to content

Commit d46aaa6

Browse files
committed
Merge branch 'main' into tyf/addSeed
2 parents 19c2871 + b0c04b7 commit d46aaa6

File tree

14 files changed

+115
-72
lines changed

14 files changed

+115
-72
lines changed

.github/workflows/main.yml

+15
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,21 @@ jobs:
353353
source scripts/ci/ascend/ci_ascend_env.sh
354354
bash scripts/ci/ascend/ci_ascend_script.sh build_dipu \
355355
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
356+
357+
Build-Ascend-910b-with-autocompare:
358+
name: Build-dipu-ascend-910b-with-autocompare
359+
needs: [Build-PyTorch-For-Ascend-910b]
360+
runs-on: tps-ascend-ci-910b
361+
steps:
362+
- name: Build dipu
363+
run: |
364+
set -ex
365+
export USE_COVERAGE=ON
366+
export USE_AUTOCOMPARE=ON
367+
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu
368+
source scripts/ci/ascend/ci_ascend_env.sh
369+
bash scripts/ci/ascend/ci_ascend_script.sh build_dipu \
370+
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
356371
357372
Test-Ascend-910b:
358373
name: Test-dipu-ascend-910b

dipu/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ set(CMAKE_CXX_STANDARD 17)
55
set(CMAKE_CXX_STANDARD_REQUIRED ON)
66
set(CMAKE_CXX_EXTENSIONS OFF)
77

8+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9+
810
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "default build type is Release")
911
option(TESTS "Whether to build unit tests" OFF)
1012
option(LIBS "Whether to build dipu lib, default on" ON)

dipu/QuickStart.md

+6-15
Original file line numberDiff line numberDiff line change
@@ -183,21 +183,12 @@ python -c "import torch_dipu"
183183

184184
#### 算子精度自动对比功能介绍
185185

186-
由于该功能默认不开启,使用该功能时需要打开该功能并重新编译 DIPU。
187-
188-
如在寒武纪设备上,可将 `dipu/torch_dipu/csrc_dipu/CMakeLists.txt` 中的 `autocompare` 修改为 `True`
189-
190-
```cmake
191-
add_custom_command(
192-
OUTPUT "${DIPU_AUTOGENED_KERNELS_CPP}"
193-
COMMAND
194-
python "${DIPU_AUTOGEN_DIOPI_WRAPPER_SCRIPT}" --config
195-
"${DIPU_AUTOGEN_DIOPI_WRAPPER_CONFIG}" --out "${DIPU_AUTOGENED_KERNELS_CPP}"
196-
--use_diopi_adapter False --autocompare True --print_func_call_info True
197-
--print_op_arg True --fun_config_dict
198-
'{\"current_device\": \"${UsedVendor}\"}'
199-
DEPENDS ${DIPU_AUTOGEN_DIOPI_WRAPPER_SCRIPT}
200-
${DIPU_AUTOGEN_DIOPI_WRAPPER_CONFIG})
186+
由于该功能默认不开启,使用该功能时需要打开该功能并重新编译DIPU。
187+
188+
可以通过设置环境变量USE_AUTOCOMPARE=ON,来开启该功能,然后需要重新编译DIPU。
189+
190+
```shell
191+
export USE_AUTOCOMPARE=ON
201192
```
202193

203194
以上方法是对所有算子开启自动精度对比。如果只需要对特定算子做精度对比,也可只给需要的算子做精度对比,只需要在相关的配置文件(如 `dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml`)给相应的算子添加 `autocompare: True` 即可。

dipu/SupportedDiopiFunctions.txt

+1
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ diopiSigmoidBackward
218218
diopiSign
219219
diopiSilu
220220
diopiSiluBackward
221+
diopiSiluInp
221222
diopiSin
222223
diopiSinInp
223224
diopiSmoothL1Loss

dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py

+25-16
Original file line numberDiff line numberDiff line change
@@ -114,36 +114,41 @@ def create_return_code_frome_schema(schema, allow_return_ref=True):
114114
def create_transform_input_to_cpu_code(fun_config):
115115
input_process_code = ""
116116
schema = fun_config["schema"]
117+
opname = get_op_name_from_schema(schema)
117118
inputs = re.findall("Tensor +([\w\d_]+)", schema[: schema.find("->")])
118119
for input in inputs:
119120
input_process_code += (
120-
f"at::Tensor {input}_cpu = to_cpu_without_diopi({input});\n"
121+
f"at::Tensor {input}_cpu = toCpuTensorWithoutDiopiCopy({input});\n"
121122
)
122123

123124
optional_inputs = re.findall("Tensor *\? +([\w\d_]+)", schema[: schema.find("->")])
124125
for input in optional_inputs:
125-
input_process_code += f"\nc10::optional<at::Tensor> {input}_cpu = {input}.has_value() && {input}.value().defined() ? c10::make_optional<at::Tensor>(to_cpu_without_diopi({input}.value())) : {input};\n"
126+
input_process_code += f"\nc10::optional<at::Tensor> {input}_cpu = {input}.has_value() && {input}.value().defined() ? c10::make_optional<at::Tensor>(toCpuTensorWithoutDiopiCopy({input}.value())) : {input};\n"
126127

127128
optional_tensor_list_inputs = re.findall(
128129
"Tensor *\? *\[ *\] +([\w\d_]+)", schema[: schema.find("->")]
129130
)
130131
for input in optional_tensor_list_inputs:
131132
input_process_code += f"\nc10::List<c10::optional<at::Tensor>> {input}_cpu;\n"
132133
input_process_code += f"for (int i = 0; i < {input}.size();++i)" + " {\n"
133-
input_process_code += f" {input}_cpu.push_back({input}[i].has_value() && {input}[i].value().defined() ? c10::make_optional<at::Tensor>(({input}[i].value())) : {input}[i]);\n"
134+
input_process_code += f" {input}_cpu.push_back({input}[i].has_value() && {input}[i].value().defined() ? c10::make_optional<at::Tensor>(toCpuTensorWithoutDiopiCopy({input}[i].value())) : {input}[i]);\n"
134135
input_process_code += "}\n"
135136

136137
outputs = re.findall(
137138
"Tensor\([a-z]!\)[ ]+([\w\d_]+){1}", schema[: schema.find("->")]
138139
)
139140
for output in outputs:
140-
if output.strip().endswith("?"):
141-
output = output.replace("?", "")
142-
input_process_code += f"\nc10::optional<at::Tensor> {output}_cpu = {output}.has_value() && {output}.value().defined() ? c10::make_optional<at::Tensor>(to_cpu_without_diopi({output}.value()) : {output};\n"
143-
else:
144-
input_process_code += (
145-
f"at::Tensor {output}_cpu = to_cpu_without_diopi({output});\n"
146-
)
141+
input_process_code += (
142+
f"at::Tensor {output}_cpu = toCpuTensorWithoutDiopiCopy({output});\n"
143+
)
144+
if ".out" in opname or "_out" in opname:
145+
for i in range(len(inputs)):
146+
input_process_code += (
147+
f"if (({inputs[i]}.data_ptr()) == {output}.data_ptr())"
148+
)
149+
input_process_code += "{\n\t"
150+
input_process_code += f"{inputs[i]}_cpu = {output}_cpu;\n\t"
151+
input_process_code += "}\n"
147152

148153
tensors_arrays = re.findall(
149154
"Tensor *\[ *\] * +([\w\d_]+)", schema[: schema.find("->")]
@@ -161,9 +166,8 @@ def create_transform_input_to_cpu_code(fun_config):
161166
)
162167
input_process_code += (
163168
f"std::transform({tensors_arg}.begin(), {tensors_arg}.end(), {tensors_arg}_cpu.begin(), [](const at::Tensor& tensor)"
164-
+ "{return to_cpu_without_diopi(tensor);});\n"
169+
+ "{return toCpuTensorWithoutDiopiCopy(tensor);});\n"
165170
)
166-
167171
return input_process_code
168172

169173

@@ -487,6 +491,9 @@ def create_call_aten_cpu_cpp_function_code_from_config(fun_config):
487491
code,
488492
)
489493

494+
if "device" in code:
495+
code = code.replace("device", "at::kCPU")
496+
490497
inputs = re.findall("Tensor +([\w\d_]+)", schema[: schema.find("->")])
491498
optional_inputs = re.findall("Tensor *\? +([\w\d_]+)", schema[: schema.find("->")])
492499
outputs = re.findall(
@@ -550,7 +557,6 @@ def create_result_compare_code(fun_config):
550557
for i in range(len(inputs)):
551558
code += separator_code
552559
code += f'std::cout << "autocompare:\t{op_name}\t{inputs[i]}: " << std::endl << allclose_autocompare({inputs[i]}_cpu, {inputs[i]}) << std::endl;\n'
553-
554560
return code
555561

556562

@@ -972,9 +978,12 @@ def functions_code_gen(fun_config):
972978

973979

974980
def boolean_string(s):
975-
if s not in {"False", "True"}:
976-
raise ValueError("Not a valid boolean string")
977-
return s == "True"
981+
if s.lower() in ["true", "on"]:
982+
return True
983+
elif s.lower() in ["false", "off"]:
984+
return False
985+
else:
986+
raise ValueError("Not a valid boolean string.")
978987

979988

980989
def parse_args():
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
DIPU_DIR=$(dirname $(dirname $(dirname "$0")))
4+
USE_AUTOCOMPARE="$1"
5+
UsedVendor="$2"
6+
Torch_VERSION="$3"
7+
8+
GENERATED_KERNELS=${DIPU_DIR}/torch_dipu/csrc_dipu/aten/ops/AutoGenedKernels.cpp
9+
GENERATED_KERNELS_SCRIPT=${DIPU_DIR}/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py
10+
GENERATED_KERNELS_CONFIG=${DIPU_DIR}/scripts/autogen_diopi_wrapper/diopi_functions.yaml
11+
GENERATED_KERNELS_VENDOR=${DIPU_DIR}/third_party/DIOPI/impl/${UsedVendor}/convert_config.yaml
12+
13+
PYTHON_CMD="python3 ${GENERATED_KERNELS_SCRIPT} --out=${GENERATED_KERNELS} --config=${GENERATED_KERNELS_CONFIG} \
14+
--autocompare=${USE_AUTOCOMPARE} --print_op_arg=True --use_diopi_adapter=False --print_func_call_info=True \
15+
--fun_config_dict='{\"current_device\":\"${UsedVendor}\",\"current_torch_ver\":\"${Torch_VERSION}\"}'"
16+
17+
if [ -f "$GENERATED_KERNELS_VENDOR" ]; then
18+
PYTHON_CMD="$PYTHON_CMD --convert_config=${GENERATED_KERNELS_VENDOR}"
19+
fi
20+
21+
eval "$PYTHON_CMD"

dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -1648,6 +1648,9 @@
16481648
auto out = nodispatch::empty_like(self);
16491649
interface: diopiSiluBackward(ctx, out, grad_output, self)
16501650

1651+
- schema: "silu_(Tensor(a!) self) -> Tensor(a!)"
1652+
interface: diopiSiluInp(ctx, self)
1653+
16511654
- schema: "reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"
16521655
interface: diopiReciprocal(ctx, out, self)
16531656

dipu/scripts/ci/ascend/ci_ascend_script.sh

+30-24
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,53 @@ function build_diopi_lib() {
1010
}
1111

1212
function config_dipu_ascend_cmake() {
13-
mkdir -p build && cd ./build && rm -rf ./*
14-
cmake ../ -DCMAKE_BUILD_TYPE=Release \
15-
-DDEVICE=ascend \
16-
-DWITH_DIOPI_LIBRARY=DISABLE
13+
mkdir -p build && cd ./build
14+
cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DWITH_DIOPI_LIBRARY=DISABLE"
15+
if [ -n "$USE_AUTOCOMPARE" ]; then
16+
cmake_args+=" -DUSE_AUTOCOMPARE=${USE_AUTOCOMPARE}"
17+
fi
18+
cmake ../ $cmake_args
1719
cd ../
1820
}
1921

2022
function config_all_ascend_cmake() {
21-
mkdir -p build && cd ./build && rm -rf ./*
22-
cmake ../ -DCMAKE_BUILD_TYPE=Release \
23-
-DDEVICE=ascend \
24-
-DENABLE_COVERAGE=${USE_COVERAGE} \
25-
-DWITH_DIOPI=INTERNAL
23+
mkdir -p build && cd ./build
24+
cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DENABLE_COVERAGE=${USE_COVERAGE} -DWITH_DIOPI=INTERNAL"
25+
if [ -n "$USE_AUTOCOMPARE" ]; then
26+
cmake_args+=" -DUSE_AUTOCOMPARE=${USE_AUTOCOMPARE}"
27+
fi
28+
cmake ../ $cmake_args
2629
cd ../
2730
}
2831

2932
function build_dipu_without_diopi() {
3033
echo "building dipu_lib without diopi:$(pwd)"
3134
config_dipu_ascend_cmake 2>&1 | tee ./build1.log
32-
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
35+
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
3336
}
3437

3538
function build_all() {
3639
echo "building dipu_lib:$(pwd)"
37-
echo "DIOPI_ROOT:${DIOPI_ROOT}"
40+
echo "DIOPI_ROOT:${DIOPI_ROOT}"
3841
config_all_ascend_cmake 2>&1 | tee ./build1.log
39-
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
42+
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
4043
}
4144

4245
case $1 in
43-
build_dipu)
44-
(
45-
build_all
46-
) \
47-
|| exit -1;;
48-
build_dipu_without_diopi)
49-
(
50-
build_dipu_without_diopi
51-
) \
52-
|| exit -1;;
53-
*)
54-
echo -e "[ERROR] Incorrect option:" $1;
46+
build_dipu)
47+
(
48+
build_all
49+
) ||
50+
exit -1
51+
;;
52+
build_dipu_without_diopi)
53+
(
54+
build_dipu_without_diopi
55+
) ||
56+
exit -1
57+
;;
58+
*)
59+
echo -e "[ERROR] Incorrect option:" $1
60+
;;
5561
esac
5662
exit 0

dipu/scripts/op_capture/op_capture.py

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def extract_op_arg(op):
4848
name = arg[0:index].strip()
4949
attrs = arg[index + 1 :]
5050
attrs = re.sub(", *data_ptr: 0x[\da-f]+", "", attrs)
51+
attrs = re.sub(", *storage_data_ptr: 0x[\da-f]+", "", attrs)
5152
args_info.append(name + ":[" + attrs + "] ")
5253

5354
return args_info

dipu/torch_dipu/csrc_dipu/CMakeLists.txt

+6-14
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#[[ Dependencies ]]
2+
option(USE_AUTOCOMPARE "whether to use USE_AUTOCOMPARE" OFF)
23

34
# Import Python3::Python, Python3_EXECUTABLE
45
# Also see https://cmake.org/cmake/help/latest/module/FindPython3.html
@@ -43,9 +44,8 @@ endif()
4344

4445
# Auto generated code.
4546
# Consider moving it to other files.
47+
set(AUTOGEN_CODE_SH "${PROJECT_SOURCE_DIR}/scripts/autogen_diopi_wrapper/autogen_wrapped_code.sh")
4648
set(GENERATED_KERNELS "${CMAKE_CURRENT_SOURCE_DIR}/aten/ops/AutoGenedKernels.cpp")
47-
set(GENERATED_KERNELS_SCRIPT "${PROJECT_SOURCE_DIR}/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py")
48-
set(GENERATED_KERNELS_CONFIG "${PROJECT_SOURCE_DIR}/scripts/autogen_diopi_wrapper/diopi_functions.yaml")
4949
set(GENERATED_KERNELS_VENDOR "${PROJECT_SOURCE_DIR}/third_party/DIOPI/impl/${UsedVendor}/convert_config.yaml")
5050

5151
if(NOT EXISTS "${GENERATED_KERNELS_VENDOR}")
@@ -54,21 +54,13 @@ endif()
5454

5555
add_custom_command(
5656
OUTPUT "${GENERATED_KERNELS}"
57-
COMMAND "${Python3_EXECUTABLE}"
58-
"${GENERATED_KERNELS_SCRIPT}"
59-
"--out=${GENERATED_KERNELS}"
60-
"--config=${GENERATED_KERNELS_CONFIG}"
61-
"--autocompare=False"
62-
"--print_op_arg=True"
63-
"--use_diopi_adapter=False"
64-
"--print_func_call_info=True"
65-
"--fun_config_dict='{\"current_device\":\"${UsedVendor}\",\"current_torch_ver\":\"${Torch_VERSION}\"}'"
66-
"$<$<BOOL:${GENERATED_KERNELS_VENDOR}>:--convert_config=${GENERATED_KERNELS_VENDOR}>"
57+
COMMAND bash -c "${AUTOGEN_CODE_SH} ${USE_AUTOCOMPARE} ${UsedVendor} ${Torch_VERSION}"
58+
COMMENT "Generating ${GENERATED_KERNELS}$<$<BOOL:${GENERATED_KERNELS_VENDOR}>: with ${GENERATED_KERNELS_VENDOR}>"
6759
DEPENDS
6860
"${GENERATED_KERNELS_SCRIPT}"
6961
"${GENERATED_KERNELS_CONFIG}"
70-
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
71-
COMMENT "Generating ${GENERATED_KERNELS}$<$<BOOL:${GENERATED_KERNELS_VENDOR}>: with ${GENERATED_KERNELS_VENDOR}>")
62+
"${AUTOGEN_CODE_SH}"
63+
)
7264

7365
# Collect source files.
7466
# You may use: find . -name "*.cpp" to update list.

dipu/torch_dipu/csrc_dipu/aten/ops/OpUtils.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ inline at::Tensor toCpuTensorWithoutDiopiCopy(const at::Tensor& in) {
4242
at::Tensor out = at::empty_strided(in.sizes(), in.strides(),
4343
in.options().device(c10::Device("cpu")));
4444
if (in.nbytes() > 0) {
45+
dipu::getCurrentDIPUStream().synchronize();
4546
dipu::devapis::memCopyD2H(out.storage().nbytes(), out.data_ptr(),
4647
in.data_ptr());
4748
}

dipu/torch_dipu/csrc_dipu/vendor/droplet/deviceimpl.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ EventStatus getEventStatus(deviceEvent_t event) {
136136
::tangGetLastError(); /* reset internal error state*/
137137
return devapis::EventStatus::PENDING;
138138
} else {
139+
printf("call a tangrt function (tangEventQuery) failed. return code=%d",
140+
ret);
139141
throw std::runtime_error("dipu device error");
140142
}
141143
}

dipu/torch_dipu/dipu/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@
2929
# as "gpu" or "cuda" (mock cuda is another problem)
3030
# only partially aligned now,
3131
__all__ = [
32-
# resume initialize flag after random generator ready
33-
# "is_initialized",
3432
# device
3533
"can_device_access_peer",
3634
"current_device",
@@ -42,6 +40,7 @@
4240
"get_device_properties",
4341
"get_device_capability",
4442
"is_available",
43+
"is_initialized",
4544
"set_device",
4645
"GetDeviceProxy",
4746
"GetDeviceStaticProxy",

0 commit comments

Comments
 (0)