Skip to content

Commit d46aaa6

Browse files
committed
Merge branch 'main' into tyf/addSeed
2 parents 19c2871 + b0c04b7 commit d46aaa6

File tree

14 files changed

+115
-72
lines changed

14 files changed

+115
-72
lines changed

.github/workflows/main.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,21 @@ jobs:
353353
source scripts/ci/ascend/ci_ascend_env.sh
354354
bash scripts/ci/ascend/ci_ascend_script.sh build_dipu \
355355
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
356+
357+
Build-Ascend-910b-with-autocompare:
358+
name: Build-dipu-ascend-910b-with-autocompare
359+
needs: [Build-PyTorch-For-Ascend-910b]
360+
runs-on: tps-ascend-ci-910b
361+
steps:
362+
- name: Build dipu
363+
run: |
364+
set -ex
365+
export USE_COVERAGE=ON
366+
export USE_AUTOCOMPARE=ON
367+
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && cp -R source ${GITHUB_JOB} && cd ${GITHUB_JOB}/dipu
368+
source scripts/ci/ascend/ci_ascend_env.sh
369+
bash scripts/ci/ascend/ci_ascend_script.sh build_dipu \
370+
|| ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${GITHUB_JOB} && exit 1 )
356371
357372
Test-Ascend-910b:
358373
name: Test-dipu-ascend-910b

dipu/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ set(CMAKE_CXX_STANDARD 17)
55
set(CMAKE_CXX_STANDARD_REQUIRED ON)
66
set(CMAKE_CXX_EXTENSIONS OFF)
77

8+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9+
810
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "default build type is Release")
911
option(TESTS "Whether to build unit tests" OFF)
1012
option(LIBS "Whether to build dipu lib, default on" ON)

dipu/QuickStart.md

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -183,21 +183,12 @@ python -c "import torch_dipu"
183183

184184
#### 算子精度自动对比功能介绍
185185

186-
由于该功能默认不开启,使用该功能时需要打开该功能并重新编译 DIPU。
187-
188-
如在寒武纪设备上,可将 `dipu/torch_dipu/csrc_dipu/CMakeLists.txt` 中的 `autocompare` 修改为 `True`
189-
190-
```cmake
191-
add_custom_command(
192-
OUTPUT "${DIPU_AUTOGENED_KERNELS_CPP}"
193-
COMMAND
194-
python "${DIPU_AUTOGEN_DIOPI_WRAPPER_SCRIPT}" --config
195-
"${DIPU_AUTOGEN_DIOPI_WRAPPER_CONFIG}" --out "${DIPU_AUTOGENED_KERNELS_CPP}"
196-
--use_diopi_adapter False --autocompare True --print_func_call_info True
197-
--print_op_arg True --fun_config_dict
198-
'{\"current_device\": \"${UsedVendor}\"}'
199-
DEPENDS ${DIPU_AUTOGEN_DIOPI_WRAPPER_SCRIPT}
200-
${DIPU_AUTOGEN_DIOPI_WRAPPER_CONFIG})
186+
由于该功能默认不开启,使用该功能时需要打开该功能并重新编译DIPU。
187+
188+
可以通过设置环境变量USE_AUTOCOMPARE=ON,来开启该功能,然后需要重新编译DIPU。
189+
190+
```shell
191+
export USE_AUTOCOMPARE=ON
201192
```
202193

203194
以上方法是对所有算子开启自动精度对比。如果只需要对特定算子做精度对比,也可只给需要的算子做精度对比,只需要在相关的配置文件(如 `dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml`)给相应的算子添加 `autocompare: True` 即可。

dipu/SupportedDiopiFunctions.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ diopiSigmoidBackward
218218
diopiSign
219219
diopiSilu
220220
diopiSiluBackward
221+
diopiSiluInp
221222
diopiSin
222223
diopiSinInp
223224
diopiSmoothL1Loss

dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -114,36 +114,41 @@ def create_return_code_frome_schema(schema, allow_return_ref=True):
114114
def create_transform_input_to_cpu_code(fun_config):
115115
input_process_code = ""
116116
schema = fun_config["schema"]
117+
opname = get_op_name_from_schema(schema)
117118
inputs = re.findall("Tensor +([\w\d_]+)", schema[: schema.find("->")])
118119
for input in inputs:
119120
input_process_code += (
120-
f"at::Tensor {input}_cpu = to_cpu_without_diopi({input});\n"
121+
f"at::Tensor {input}_cpu = toCpuTensorWithoutDiopiCopy({input});\n"
121122
)
122123

123124
optional_inputs = re.findall("Tensor *\? +([\w\d_]+)", schema[: schema.find("->")])
124125
for input in optional_inputs:
125-
input_process_code += f"\nc10::optional<at::Tensor> {input}_cpu = {input}.has_value() && {input}.value().defined() ? c10::make_optional<at::Tensor>(to_cpu_without_diopi({input}.value())) : {input};\n"
126+
input_process_code += f"\nc10::optional<at::Tensor> {input}_cpu = {input}.has_value() && {input}.value().defined() ? c10::make_optional<at::Tensor>(toCpuTensorWithoutDiopiCopy({input}.value())) : {input};\n"
126127

127128
optional_tensor_list_inputs = re.findall(
128129
"Tensor *\? *\[ *\] +([\w\d_]+)", schema[: schema.find("->")]
129130
)
130131
for input in optional_tensor_list_inputs:
131132
input_process_code += f"\nc10::List<c10::optional<at::Tensor>> {input}_cpu;\n"
132133
input_process_code += f"for (int i = 0; i < {input}.size();++i)" + " {\n"
133-
input_process_code += f" {input}_cpu.push_back({input}[i].has_value() && {input}[i].value().defined() ? c10::make_optional<at::Tensor>(({input}[i].value())) : {input}[i]);\n"
134+
input_process_code += f" {input}_cpu.push_back({input}[i].has_value() && {input}[i].value().defined() ? c10::make_optional<at::Tensor>(toCpuTensorWithoutDiopiCopy({input}[i].value())) : {input}[i]);\n"
134135
input_process_code += "}\n"
135136

136137
outputs = re.findall(
137138
"Tensor\([a-z]!\)[ ]+([\w\d_]+){1}", schema[: schema.find("->")]
138139
)
139140
for output in outputs:
140-
if output.strip().endswith("?"):
141-
output = output.replace("?", "")
142-
input_process_code += f"\nc10::optional<at::Tensor> {output}_cpu = {output}.has_value() && {output}.value().defined() ? c10::make_optional<at::Tensor>(to_cpu_without_diopi({output}.value()) : {output};\n"
143-
else:
144-
input_process_code += (
145-
f"at::Tensor {output}_cpu = to_cpu_without_diopi({output});\n"
146-
)
141+
input_process_code += (
142+
f"at::Tensor {output}_cpu = toCpuTensorWithoutDiopiCopy({output});\n"
143+
)
144+
if ".out" in opname or "_out" in opname:
145+
for i in range(len(inputs)):
146+
input_process_code += (
147+
f"if (({inputs[i]}.data_ptr()) == {output}.data_ptr())"
148+
)
149+
input_process_code += "{\n\t"
150+
input_process_code += f"{inputs[i]}_cpu = {output}_cpu;\n\t"
151+
input_process_code += "}\n"
147152

148153
tensors_arrays = re.findall(
149154
"Tensor *\[ *\] * +([\w\d_]+)", schema[: schema.find("->")]
@@ -161,9 +166,8 @@ def create_transform_input_to_cpu_code(fun_config):
161166
)
162167
input_process_code += (
163168
f"std::transform({tensors_arg}.begin(), {tensors_arg}.end(), {tensors_arg}_cpu.begin(), [](const at::Tensor& tensor)"
164-
+ "{return to_cpu_without_diopi(tensor);});\n"
169+
+ "{return toCpuTensorWithoutDiopiCopy(tensor);});\n"
165170
)
166-
167171
return input_process_code
168172

169173

@@ -487,6 +491,9 @@ def create_call_aten_cpu_cpp_function_code_from_config(fun_config):
487491
code,
488492
)
489493

494+
if "device" in code:
495+
code = code.replace("device", "at::kCPU")
496+
490497
inputs = re.findall("Tensor +([\w\d_]+)", schema[: schema.find("->")])
491498
optional_inputs = re.findall("Tensor *\? +([\w\d_]+)", schema[: schema.find("->")])
492499
outputs = re.findall(
@@ -550,7 +557,6 @@ def create_result_compare_code(fun_config):
550557
for i in range(len(inputs)):
551558
code += separator_code
552559
code += f'std::cout << "autocompare:\t{op_name}\t{inputs[i]}: " << std::endl << allclose_autocompare({inputs[i]}_cpu, {inputs[i]}) << std::endl;\n'
553-
554560
return code
555561

556562

@@ -972,9 +978,12 @@ def functions_code_gen(fun_config):
972978

973979

974980
def boolean_string(s):
975-
if s not in {"False", "True"}:
976-
raise ValueError("Not a valid boolean string")
977-
return s == "True"
981+
if s.lower() in ["true", "on"]:
982+
return True
983+
elif s.lower() in ["false", "off"]:
984+
return False
985+
else:
986+
raise ValueError("Not a valid boolean string.")
978987

979988

980989
def parse_args():
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
DIPU_DIR=$(dirname $(dirname $(dirname "$0")))
4+
USE_AUTOCOMPARE="$1"
5+
UsedVendor="$2"
6+
Torch_VERSION="$3"
7+
8+
GENERATED_KERNELS=${DIPU_DIR}/torch_dipu/csrc_dipu/aten/ops/AutoGenedKernels.cpp
9+
GENERATED_KERNELS_SCRIPT=${DIPU_DIR}/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py
10+
GENERATED_KERNELS_CONFIG=${DIPU_DIR}/scripts/autogen_diopi_wrapper/diopi_functions.yaml
11+
GENERATED_KERNELS_VENDOR=${DIPU_DIR}/third_party/DIOPI/impl/${UsedVendor}/convert_config.yaml
12+
13+
PYTHON_CMD="python3 ${GENERATED_KERNELS_SCRIPT} --out=${GENERATED_KERNELS} --config=${GENERATED_KERNELS_CONFIG} \
14+
--autocompare=${USE_AUTOCOMPARE} --print_op_arg=True --use_diopi_adapter=False --print_func_call_info=True \
15+
--fun_config_dict='{\"current_device\":\"${UsedVendor}\",\"current_torch_ver\":\"${Torch_VERSION}\"}'"
16+
17+
if [ -f "$GENERATED_KERNELS_VENDOR" ]; then
18+
PYTHON_CMD="$PYTHON_CMD --convert_config=${GENERATED_KERNELS_VENDOR}"
19+
fi
20+
21+
eval "$PYTHON_CMD"

dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,6 +1648,9 @@
16481648
auto out = nodispatch::empty_like(self);
16491649
interface: diopiSiluBackward(ctx, out, grad_output, self)
16501650

1651+
- schema: "silu_(Tensor(a!) self) -> Tensor(a!)"
1652+
interface: diopiSiluInp(ctx, self)
1653+
16511654
- schema: "reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"
16521655
interface: diopiReciprocal(ctx, out, self)
16531656

dipu/scripts/ci/ascend/ci_ascend_script.sh

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,53 @@ function build_diopi_lib() {
1010
}
1111

1212
function config_dipu_ascend_cmake() {
13-
mkdir -p build && cd ./build && rm -rf ./*
14-
cmake ../ -DCMAKE_BUILD_TYPE=Release \
15-
-DDEVICE=ascend \
16-
-DWITH_DIOPI_LIBRARY=DISABLE
13+
mkdir -p build && cd ./build
14+
cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DWITH_DIOPI_LIBRARY=DISABLE"
15+
if [ -n "$USE_AUTOCOMPARE" ]; then
16+
cmake_args+=" -DUSE_AUTOCOMPARE=${USE_AUTOCOMPARE}"
17+
fi
18+
cmake ../ $cmake_args
1719
cd ../
1820
}
1921

2022
function config_all_ascend_cmake() {
21-
mkdir -p build && cd ./build && rm -rf ./*
22-
cmake ../ -DCMAKE_BUILD_TYPE=Release \
23-
-DDEVICE=ascend \
24-
-DENABLE_COVERAGE=${USE_COVERAGE} \
25-
-DWITH_DIOPI=INTERNAL
23+
mkdir -p build && cd ./build
24+
cmake_args="-DCMAKE_BUILD_TYPE=Release -DDEVICE=ascend -DENABLE_COVERAGE=${USE_COVERAGE} -DWITH_DIOPI=INTERNAL"
25+
if [ -n "$USE_AUTOCOMPARE" ]; then
26+
cmake_args+=" -DUSE_AUTOCOMPARE=${USE_AUTOCOMPARE}"
27+
fi
28+
cmake ../ $cmake_args
2629
cd ../
2730
}
2831

2932
function build_dipu_without_diopi() {
3033
echo "building dipu_lib without diopi:$(pwd)"
3134
config_dipu_ascend_cmake 2>&1 | tee ./build1.log
32-
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
35+
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
3336
}
3437

3538
function build_all() {
3639
echo "building dipu_lib:$(pwd)"
37-
echo "DIOPI_ROOT:${DIOPI_ROOT}"
40+
echo "DIOPI_ROOT:${DIOPI_ROOT}"
3841
config_all_ascend_cmake 2>&1 | tee ./build1.log
39-
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
42+
cd build && make -j8 2>&1 | tee ./build1.log && cd ..
4043
}
4144

4245
case $1 in
43-
build_dipu)
44-
(
45-
build_all
46-
) \
47-
|| exit -1;;
48-
build_dipu_without_diopi)
49-
(
50-
build_dipu_without_diopi
51-
) \
52-
|| exit -1;;
53-
*)
54-
echo -e "[ERROR] Incorrect option:" $1;
46+
build_dipu)
47+
(
48+
build_all
49+
) ||
50+
exit -1
51+
;;
52+
build_dipu_without_diopi)
53+
(
54+
build_dipu_without_diopi
55+
) ||
56+
exit -1
57+
;;
58+
*)
59+
echo -e "[ERROR] Incorrect option:" $1
60+
;;
5561
esac
5662
exit 0

dipu/scripts/op_capture/op_capture.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def extract_op_arg(op):
4848
name = arg[0:index].strip()
4949
attrs = arg[index + 1 :]
5050
attrs = re.sub(", *data_ptr: 0x[\da-f]+", "", attrs)
51+
attrs = re.sub(", *storage_data_ptr: 0x[\da-f]+", "", attrs)
5152
args_info.append(name + ":[" + attrs + "] ")
5253

5354
return args_info

0 commit comments

Comments
 (0)