Skip to content
This repository was archived by the owner on Dec 24, 2024. It is now read-only.
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit de7048c

Browse files
committedAug 30, 2024·
[WIP] AIE CI tests
1 parent f505444 commit de7048c

8 files changed

+1913
-0
lines changed
 

‎.github/workflows/ci-linux.yml

+203
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
name: CI Linux
2+
3+
on:
4+
workflow_call:
5+
workflow_dispatch:
6+
pull_request:
7+
merge_group:
8+
push:
9+
branches:
10+
- main
11+
12+
concurrency:
13+
# A PR number if a pull request and otherwise the commit hash. This cancels
14+
# queued and in-progress runs for the same PR (presubmit) or commit
15+
# (postsubmit).
16+
group: ci-build-test-cpp-linux-${{ github.event.number || github.sha }}
17+
cancel-in-progress: true
18+
19+
jobs:
20+
build:
21+
name: Build (linux)
22+
strategy:
23+
fail-fast: false
24+
matrix:
25+
runs-on:
26+
- ubuntu-22.04
27+
# - nod-ai-shared-cpubuilder-manylinux-x86_64
28+
runs-on: ${{ matrix.runs-on }}
29+
env:
30+
CACHE_DIR: ${{ github.workspace }}/.container-cache
31+
CACHE_KEY: linux-build-manylinux-v1-${{ github.event.number || format('{0}-{1}', github.ref_name, github.run_number) }}
32+
steps:
33+
- name: Install tmate
34+
if: ${{ matrix.runs-on == 'nod-ai-shared-cpubuilder-manylinux-x86_64' }}
35+
run: dnf install -y epel-release && dnf install -y tmate
36+
37+
- name: Set unified TZ
38+
uses: szenius/set-timezone@v2.0
39+
with:
40+
# this is an arbitrary choice
41+
timezoneLinux: "Asia/Singapore"
42+
timezoneMacos: "Asia/Singapore"
43+
timezoneWindows: "Singapore Standard Time"
44+
45+
- name: Configure local git mirrors
46+
if: ${{ matrix.runs-on == 'nod-ai-shared-cpubuilder-manylinux-x86_64' }}
47+
run: |
48+
/gitmirror/scripts/trigger_update_mirrors.sh
49+
/gitmirror/scripts/git_config.sh
50+
51+
- name: Install deps
52+
if: ${{ matrix.runs-on == 'nod-ai-shared-cpubuilder-manylinux-x86_64' }}
53+
run: |
54+
dnf install -y almalinux-release-devel
55+
yum install -y elfutils-libelf-devel p7zip p7zip-plugins
56+
57+
- name: Install deps
58+
if: ${{ matrix.runs-on == 'ubuntu-22.04' }}
59+
run: |
60+
sudo apt install -y libelf-dev libnuma-dev libdrm-dev
61+
62+
- name: Setup Cpp
63+
if: ${{ matrix.runs-on == 'ubuntu-22.04' }}
64+
uses: aminya/setup-cpp@v1
65+
with:
66+
compiler: llvm-18
67+
cmake: true
68+
ninja: true
69+
ccache: true
70+
71+
- name: "Checking out repository"
72+
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
73+
with:
74+
submodules: recursive
75+
76+
- name: Enable cache
77+
uses: actions/cache/restore@v3
78+
with:
79+
path: ${{ env.CACHE_DIR }}
80+
key: ${{ env.CACHE_KEY }}
81+
restore-keys: linux-build-
82+
83+
- name: Configure ccache
84+
run: |
85+
export cache_dir="${{ env.CACHE_DIR }}"
86+
if [ -z "${cache_dir}" ]; then
87+
cache_dir="${{ github.workspace }}/.build-cache"
88+
mkdir -p "${cache_dir}"
89+
cache_dir="$(cd ${cache_dir} && pwd)"
90+
fi
91+
echo "Caching to ${cache_dir}"
92+
mkdir -p "${cache_dir}/ccache"
93+
export CCACHE_DIR="${cache_dir}/ccache" >> $GITHUB_ENV
94+
export CCACHE_MAXSIZE="700M" >> $GITHUB_ENV
95+
export CMAKE_C_COMPILER_LAUNCHER=ccache >> $GITHUB_ENV
96+
export CMAKE_CXX_COMPILER_LAUNCHER=ccache >> $GITHUB_ENV
97+
export CCACHE_COMPILERCHECK="string:$(clang --version)" >> $GITHUB_ENV
98+
99+
- name: Build and install libnuma
100+
if: ${{ matrix.runs-on == 'nod-ai-shared-cpubuilder-manylinux-x86_64' }}
101+
run: |
102+
curl --silent -L \
103+
https://github.com/numactl/numactl/releases/download/v2.0.18/numactl-2.0.18.tar.gz \
104+
-o numactl-2.0.18.tar.gz
105+
tar -xf numactl-2.0.18.tar.gz
106+
pushd numactl-2.0.18
107+
./configure
108+
make install
109+
popd
110+
111+
- name: Hack ROCR
112+
run: |
113+
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/blit_shaders/CMakeLists.txt
114+
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/trap_handler/CMakeLists.txt
115+
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/image/blit_src/CMakeLists.txt
116+
117+
- name: Build ROCR distro
118+
run: |
119+
rocr_dir="$(cd ${{ github.workspace }} && pwd)"
120+
build_rocr_dir="${{ github.workspace }}/rocr-build"
121+
mkdir -p "$build_rocr_dir"
122+
build_rocr_dir="$(cd $build_rocr_dir && pwd)"
123+
rocr_install_dir="${{ github.workspace }}/rocr-install"
124+
125+
cmake -GNinja \
126+
-DCMAKE_BUILD_TYPE=Release \
127+
-DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \
128+
-DClang_DIR=/usr/lib/llvm-18/lib/cmake/clang \
129+
-DLLVM_DIR=/usr/lib/llvm-18/lib/cmake/llvm \
130+
-DIMAGE_SUPPORT=OFF \
131+
-S "$rocr_dir" -B "$build_rocr_dir"
132+
cmake --build "$build_rocr_dir" --target install
133+
134+
- name: Create artifacts
135+
if: ${{ !cancelled() }}
136+
run: |
137+
tar -cf rocr-$(git rev-parse --short HEAD).tar rocr-install
138+
139+
- name: Upload artifacts
140+
uses: actions/upload-artifact@v4
141+
if: ${{ !cancelled() }}
142+
with:
143+
name: linux_x86_64_distro
144+
path: rocr-*.tar
145+
if-no-files-found: warn
146+
147+
- name: Save cache
148+
uses: actions/cache/save@v3
149+
if: ${{ !cancelled() }}
150+
with:
151+
path: ${{ env.CACHE_DIR }}
152+
key: ${{ env.CACHE_KEY }}
153+
154+
- name: Setup tmate session
155+
if: ${{ failure() }}
156+
uses: mxschmitt/action-tmate@v3.18
157+
with:
158+
limit-access-to-actor: true
159+
install-dependencies: ${{ matrix.runs-on == 'ubuntu-22.04' }}
160+
161+
test_aie:
162+
name: AIE tests
163+
needs: build
164+
strategy:
165+
fail-fast: false
166+
matrix:
167+
runs-on: [linux-phoenix]
168+
runs-on: ${{ matrix.runs-on }}
169+
steps:
170+
- name: "Checking out repository"
171+
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3
172+
173+
- name: Download artifacts
174+
uses: actions/download-artifact@v4
175+
with:
176+
name: linux_x86_64_distro
177+
178+
- name: Extract artifact
179+
run: |
180+
mkdir -p rocr-install
181+
tar -xf rocr-*.tar
182+
export hsa_runtime64_ROOT="$PWD/rocr-install" >> $GITHUB_ENV
183+
184+
- name: Build AIE test suite
185+
run: |
186+
pushd rocrtst/suites/aie
187+
build_dir="$PWD/build"
188+
mkdir -p $build_dir
189+
cmake -GNinja \
190+
-DCMAKE_BUILD_TYPE=Release \
191+
-Dhsa_runtime64_ROOT="${hsa_runtime64_ROOT}" \
192+
-S "$PWD" -B "$build_dir"
193+
cmake --build "$build_dir" --target \
194+
aie_hsa_bare_add_one \
195+
aie_hsa_dispatch_test
196+
popd
197+
198+
- name: Run AIE test suite
199+
run: |
200+
pushd rocrtst/suites/aie
201+
build_dir="$PWD/build"
202+
./"$build_dir"/aie_hsa_bare_add_one $PWD
203+
./"$build_dir"/aie_hsa_dispatch_test $PWD

‎rocrtst/suites/aie/CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
find_package(hsa-runtime64 CONFIG REQUIRED NAMES hsa_runtime64)
2+
3+
# smoke test
4+
add_executable(aie_hsa_bare_add_one aie_hsa_bare_add_one.cc)
5+
6+
# hsa test
7+
add_executable(aie_hsa_dispatch_test aie_hsa_dispatch_test.cc)
8+
target_link_libraries(aie_hsa_dispatch_test PUBLIC hsa-runtime64::hsa-runtime64)

‎rocrtst/suites/aie/add_one.pdi

3.47 KB
Binary file not shown.

‎rocrtst/suites/aie/add_one_insts.txt

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
06030100
2+
00000105
3+
00000007
4+
00000110
5+
00000001
6+
00000000
7+
0001D000
8+
00000030
9+
00000400
10+
00000000
11+
00000000
12+
00000000
13+
80000000
14+
00000000
15+
00000000
16+
02000000
17+
00000081
18+
00000030
19+
00000000
20+
00000000
21+
00000000
22+
00000000
23+
0001D004
24+
00000000
25+
00000001
26+
00000000
27+
00000000
28+
00000000
29+
00000000
30+
00000000
31+
0001D204
32+
00000000
33+
80000000
34+
00000018
35+
00000001
36+
00000000
37+
0001D020
38+
00000030
39+
00000400
40+
00000000
41+
00000000
42+
00000000
43+
80000000
44+
00000000
45+
00000000
46+
02000000
47+
00000081
48+
00000030
49+
00000000
50+
00000000
51+
00000000
52+
00000000
53+
0001D024
54+
00000000
55+
00000000
56+
00000000
57+
00000000
58+
00000000
59+
00000000
60+
00000000
61+
0001D214
62+
00000000
63+
00000001
64+
00000018
65+
00000080
66+
00000010
67+
00000000
68+
00010100

‎rocrtst/suites/aie/aie_hsa_bare_add_one.cc

+485
Large diffs are not rendered by default.
+309
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
// Copyright 2024 The IREE Authors
2+
//
3+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include <sys/mman.h>
8+
9+
#include <cassert>
10+
#include <cstring>
11+
#include <filesystem>
12+
#include <fstream>
13+
#include <iostream>
14+
#include <string>
15+
#include <vector>
16+
17+
#include "hsa/hsa.h"
18+
#include "hsa/hsa_ext_amd.h"
19+
20+
namespace {
21+
22+
hsa_status_t get_agent(hsa_agent_t agent, std::vector<hsa_agent_t> *agents,
23+
hsa_device_type_t requested_dev_type) {
24+
if (!agents || !(requested_dev_type == HSA_DEVICE_TYPE_AIE ||
25+
requested_dev_type == HSA_DEVICE_TYPE_GPU ||
26+
requested_dev_type == HSA_DEVICE_TYPE_CPU)) {
27+
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
28+
}
29+
30+
hsa_device_type_t device_type;
31+
hsa_status_t ret =
32+
hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
33+
34+
if (ret != HSA_STATUS_SUCCESS) {
35+
return ret;
36+
}
37+
38+
if (device_type == requested_dev_type) {
39+
agents->push_back(agent);
40+
}
41+
42+
return ret;
43+
}
44+
45+
hsa_status_t get_aie_agents(hsa_agent_t agent, void *data) {
46+
if (!data) {
47+
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
48+
}
49+
50+
auto *aie_agents = reinterpret_cast<std::vector<hsa_agent_t> *>(data);
51+
return get_agent(agent, aie_agents, HSA_DEVICE_TYPE_AIE);
52+
}
53+
54+
hsa_status_t get_coarse_global_mem_pool(hsa_amd_memory_pool_t pool, void *data,
55+
bool kernarg) {
56+
hsa_amd_segment_t segment_type;
57+
auto ret = hsa_amd_memory_pool_get_info(
58+
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment_type);
59+
if (ret != HSA_STATUS_SUCCESS) {
60+
return ret;
61+
}
62+
63+
if (segment_type == HSA_AMD_SEGMENT_GLOBAL) {
64+
hsa_amd_memory_pool_global_flag_t global_pool_flags;
65+
ret = hsa_amd_memory_pool_get_info(
66+
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_pool_flags);
67+
if (ret != HSA_STATUS_SUCCESS) {
68+
return ret;
69+
}
70+
71+
if (kernarg) {
72+
if ((global_pool_flags &
73+
HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) &&
74+
(global_pool_flags & HSA_REGION_GLOBAL_FLAG_KERNARG)) {
75+
*static_cast<hsa_amd_memory_pool_t *>(data) = pool;
76+
}
77+
} else {
78+
if ((global_pool_flags &
79+
HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) &&
80+
!(global_pool_flags & HSA_REGION_GLOBAL_FLAG_KERNARG)) {
81+
*static_cast<hsa_amd_memory_pool_t *>(data) = pool;
82+
}
83+
}
84+
}
85+
86+
return HSA_STATUS_SUCCESS;
87+
}
88+
89+
hsa_status_t get_coarse_global_dev_mem_pool(hsa_amd_memory_pool_t pool,
90+
void *data) {
91+
return get_coarse_global_mem_pool(pool, data, false);
92+
}
93+
94+
hsa_status_t get_coarse_global_kernarg_mem_pool(hsa_amd_memory_pool_t pool,
95+
void *data) {
96+
return get_coarse_global_mem_pool(pool, data, true);
97+
}
98+
99+
void load_pdi_file(hsa_amd_memory_pool_t mem_pool, const std::string &file_name,
100+
void **buf) {
101+
std::ifstream bin_file(file_name,
102+
std::ios::binary | std::ios::ate | std::ios::in);
103+
104+
assert(bin_file.fail() == false);
105+
106+
auto size(bin_file.tellg());
107+
108+
bin_file.seekg(0, std::ios::beg);
109+
auto r = hsa_amd_memory_pool_allocate(mem_pool, size, 0, buf);
110+
assert(r == HSA_STATUS_SUCCESS);
111+
bin_file.read(reinterpret_cast<char *>(*buf), size);
112+
}
113+
114+
void load_dpu_file(hsa_amd_memory_pool_t mem_pool, const std::string &file_name,
115+
void **buf) {
116+
std::ifstream bin_file(file_name,
117+
std::ios::binary | std::ios::ate | std::ios::in);
118+
119+
assert(bin_file.fail() == false);
120+
121+
auto size(bin_file.tellg());
122+
bin_file.seekg(0, std::ios::beg);
123+
std::vector<uint32_t> pdi_vec;
124+
std::string val;
125+
126+
while (bin_file >> val) {
127+
pdi_vec.push_back(std::stoul(val, nullptr, 16));
128+
}
129+
auto r = hsa_amd_memory_pool_allocate(mem_pool, size, 0, buf);
130+
assert(r == HSA_STATUS_SUCCESS);
131+
std::memcpy(*buf, pdi_vec.data(), pdi_vec.size() * sizeof(uint32_t));
132+
}
133+
134+
} // namespace
135+
136+
int main(int argc, char **argv) {
137+
std::filesystem::path sourcePath(argv[1]);
138+
// List of AIE agents in the system.
139+
std::vector<hsa_agent_t> aie_agents;
140+
// For creating a queue on an AIE agent.
141+
hsa_queue_t *aie_queue(nullptr);
142+
// Memory pool for allocating device-mapped memory. Used for PDI/DPU
143+
// instructions.
144+
hsa_amd_memory_pool_t global_dev_mem_pool{0};
145+
// System memory pool. Used for allocating kernel argument data.
146+
hsa_amd_memory_pool_t global_kernarg_mem_pool{0};
147+
const std::string dpu_inst_file_name(sourcePath / "add_one_insts.txt");
148+
const std::string pdi_file_name(sourcePath / "add_one.pdi");
149+
uint32_t *dpu_inst_buf(nullptr);
150+
uint64_t *pdi_buf(nullptr);
151+
152+
assert(aie_agents.empty());
153+
assert(global_dev_mem_pool.handle == 0);
154+
assert(global_kernarg_mem_pool.handle == 0);
155+
156+
// Initialize the runtime.
157+
auto r = hsa_init();
158+
assert(r == HSA_STATUS_SUCCESS);
159+
160+
assert(sizeof(hsa_kernel_dispatch_packet_s) ==
161+
sizeof(hsa_amd_aie_ert_packet_s));
162+
163+
// Test a launch of an AIE kernel using the HSA API.
164+
// Find the AIE agents in the system.
165+
r = hsa_iterate_agents(get_aie_agents, &aie_agents);
166+
assert(r == HSA_STATUS_SUCCESS);
167+
// assert(hsa_iterate_agents(get_cpu_agents, &aie_agents) ==
168+
// HSA_STATUS_SUCCESS);
169+
assert(aie_agents.size() == 1);
170+
171+
const auto &aie_agent = aie_agents.front();
172+
173+
// Create a queue on the first agent.
174+
r = hsa_queue_create(aie_agent, 64, HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr,
175+
0, 0, &aie_queue);
176+
assert(r == HSA_STATUS_SUCCESS);
177+
assert(aie_queue);
178+
assert(aie_queue->base_address);
179+
180+
// Find a pool for DEV BOs. This is a global system memory pool that is
181+
// mapped to the device. Will be used for PDIs and DPU instructions.
182+
r = hsa_amd_agent_iterate_memory_pools(
183+
aie_agent, get_coarse_global_dev_mem_pool, &global_dev_mem_pool);
184+
assert(r == HSA_STATUS_SUCCESS);
185+
186+
// Find a pool that supports kernel args. This is just normal system memory.
187+
// It will be used for commands and input data.
188+
r = hsa_amd_agent_iterate_memory_pools(
189+
aie_agent, get_coarse_global_kernarg_mem_pool, &global_kernarg_mem_pool);
190+
assert(r == HSA_STATUS_SUCCESS);
191+
assert(global_kernarg_mem_pool.handle);
192+
193+
// Load the DPU and PDI files into a global pool that doesn't support kernel
194+
// args (DEV BO).
195+
load_dpu_file(global_dev_mem_pool, dpu_inst_file_name,
196+
reinterpret_cast<void **>(&dpu_inst_buf));
197+
uint32_t dpu_handle = 0;
198+
r = hsa_amd_get_handle_from_vaddr(dpu_inst_buf, &dpu_handle);
199+
assert(r == HSA_STATUS_SUCCESS);
200+
assert(dpu_handle != 0);
201+
202+
load_pdi_file(global_dev_mem_pool, pdi_file_name,
203+
reinterpret_cast<void **>(&pdi_buf));
204+
uint32_t pdi_handle = 0;
205+
r = hsa_amd_get_handle_from_vaddr(pdi_buf, &pdi_handle);
206+
assert(r == HSA_STATUS_SUCCESS);
207+
assert(pdi_handle != 0);
208+
209+
hsa_amd_aie_ert_hw_ctx_cu_config_t cu_config{.cu_config_bo = pdi_handle,
210+
.cu_func = 0};
211+
212+
hsa_amd_aie_ert_hw_ctx_config_cu_param_t config_cu_args{
213+
.num_cus = 1, .cu_configs = &cu_config};
214+
215+
// Configure the queue's hardware context.
216+
r = hsa_amd_queue_hw_ctx_config(
217+
aie_queue, HSA_AMD_QUEUE_AIE_ERT_HW_CXT_CONFIG_CU, &config_cu_args);
218+
assert(r == HSA_STATUS_SUCCESS);
219+
220+
// create inputs / outputs
221+
constexpr std::size_t num_data_elements = 1024;
222+
constexpr std::size_t data_buffer_size =
223+
num_data_elements * sizeof(std::uint32_t);
224+
225+
std::uint32_t *input = {};
226+
r = hsa_amd_memory_pool_allocate(global_dev_mem_pool, data_buffer_size, 0,
227+
reinterpret_cast<void **>(&input));
228+
assert(r == HSA_STATUS_SUCCESS);
229+
std::uint32_t input_handle = {};
230+
r = hsa_amd_get_handle_from_vaddr(input, &input_handle);
231+
assert(r == HSA_STATUS_SUCCESS);
232+
assert(input_handle != 0);
233+
234+
std::uint32_t *output = {};
235+
r = hsa_amd_memory_pool_allocate(global_dev_mem_pool, data_buffer_size, 0,
236+
reinterpret_cast<void **>(&output));
237+
assert(r == HSA_STATUS_SUCCESS);
238+
std::uint32_t output_handle = {};
239+
r = hsa_amd_get_handle_from_vaddr(output, &output_handle);
240+
assert(r == HSA_STATUS_SUCCESS);
241+
assert(output_handle != 0);
242+
243+
for (std::size_t i = 0; i < num_data_elements; i++) {
244+
*(input + i) = i;
245+
*(output + i) = 0xDEFACE;
246+
}
247+
248+
///////////////////////////////////// Creating the cmd packet
249+
// Creating a packet to store the command
250+
hsa_amd_aie_ert_packet_t *cmd_pkt = NULL;
251+
r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, 64, 0,
252+
reinterpret_cast<void **>(&cmd_pkt));
253+
assert(r == HSA_STATUS_SUCCESS);
254+
cmd_pkt->state = HSA_AMD_AIE_ERT_STATE_NEW;
255+
cmd_pkt->count = 0xA; // # of arguments to put in command
256+
cmd_pkt->opcode = HSA_AMD_AIE_ERT_START_CU;
257+
cmd_pkt->header.AmdFormat = HSA_AMD_PACKET_TYPE_AIE_ERT;
258+
cmd_pkt->header.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC
259+
<< HSA_PACKET_HEADER_TYPE;
260+
261+
// Creating the payload for the packet
262+
hsa_amd_aie_ert_start_kernel_data_t *cmd_payload = NULL;
263+
uint32_t cmd_handle;
264+
r = hsa_amd_get_handle_from_vaddr(reinterpret_cast<void *>(cmd_pkt),
265+
&cmd_handle);
266+
assert(r == HSA_STATUS_SUCCESS);
267+
r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, 64, 0,
268+
reinterpret_cast<void **>(&cmd_payload));
269+
assert(r == HSA_STATUS_SUCCESS);
270+
cmd_payload->cu_mask = 0x1; // Selecting the PDI to use with this command
271+
cmd_payload->data[0] = 0x3; // Transaction opcode
272+
cmd_payload->data[1] = 0x0;
273+
cmd_payload->data[2] = dpu_handle;
274+
cmd_payload->data[3] = 0x0;
275+
cmd_payload->data[4] = 0x44; // Size of DPU instruction
276+
cmd_payload->data[5] = input_handle;
277+
cmd_payload->data[6] = 0;
278+
cmd_payload->data[7] = output_handle;
279+
cmd_payload->data[8] = 0;
280+
cmd_pkt->payload_data = reinterpret_cast<uint64_t>(cmd_payload);
281+
282+
uint64_t wr_idx = hsa_queue_add_write_index_relaxed(aie_queue, 1);
283+
uint64_t packet_id = wr_idx % aie_queue->size;
284+
reinterpret_cast<hsa_amd_aie_ert_packet_t *>(
285+
aie_queue->base_address)[packet_id] = *cmd_pkt;
286+
hsa_signal_store_screlease(aie_queue->doorbell_signal, wr_idx);
287+
288+
for (std::size_t i = 0; i < num_data_elements; i++) {
289+
const auto expected = *(input + i) + 1;
290+
const auto result = *(output + i);
291+
assert(result == expected);
292+
}
293+
294+
r = hsa_queue_destroy(aie_queue);
295+
assert(r == HSA_STATUS_SUCCESS);
296+
297+
r = hsa_amd_memory_pool_free(output);
298+
assert(r == HSA_STATUS_SUCCESS);
299+
r = hsa_amd_memory_pool_free(input);
300+
assert(r == HSA_STATUS_SUCCESS);
301+
r = hsa_amd_memory_pool_free(pdi_buf);
302+
assert(r == HSA_STATUS_SUCCESS);
303+
r = hsa_amd_memory_pool_free(dpu_inst_buf);
304+
assert(r == HSA_STATUS_SUCCESS);
305+
306+
r = hsa_shut_down();
307+
assert(r == HSA_STATUS_SUCCESS);
308+
std::cout << "PASS\n";
309+
}

‎rocrtst/suites/aie/amdxdna_accel.h

+569
Large diffs are not rendered by default.

‎rocrtst/suites/aie/hsa_ipu.h

+271
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
// Copyright 2024 The IREE Authors
2+
//
3+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#pragma once
7+
8+
#include <fcntl.h>
9+
#include <sys/ioctl.h>
10+
#include <sys/mman.h>
11+
#include <sys/stat.h>
12+
#include <unistd.h>
13+
14+
#include <cerrno>
15+
#include <cstdint>
16+
#include <cstdio>
17+
#include <cstdlib>
18+
#include <cstring>
19+
#include <ctime>
20+
21+
#include "amdxdna_accel.h"
22+
23+
// want to mmap the file
24+
25+
#define MAX_NUM_INSTRUCTIONS 1024 // Maximum number of dpu or pdi instructions.
26+
27+
// Dummy packet defines
28+
29+
int map_doorbell(int fd, uint64_t *doorbell) {
30+
// Mmap the mailbox.
31+
int32_t page_size = 4096;
32+
*doorbell = (uint64_t)mmap(NULL, page_size, PROT_READ | PROT_WRITE,
33+
MAP_SHARED, fd, 0);
34+
if (doorbell != MAP_FAILED) {
35+
printf("Doorbell mapped\n");
36+
return 0;
37+
}
38+
39+
printf("[ERROR] doorbell mmap failed: %s\n", strerror(errno));
40+
return errno;
41+
}
42+
43+
void ring_doorbell(uint64_t doorbell) {
44+
int32_t curr_tail = *((int32_t *)doorbell);
45+
*((uint32_t *)doorbell) = curr_tail + 0x94;
46+
}
47+
48+
int get_driver_version(int fd, __u32 *major, __u32 *minor) {
49+
int ret;
50+
amdxdna_drm_query_aie_version version;
51+
52+
amdxdna_drm_get_info info_params = {
53+
.param = DRM_AMDXDNA_QUERY_AIE_VERSION,
54+
.buffer_size = sizeof(version),
55+
.buffer = (__u64)&version,
56+
};
57+
58+
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_GET_INFO, &info_params);
59+
if (ret == 0) {
60+
*major = version.major;
61+
*minor = version.minor;
62+
}
63+
64+
return ret;
65+
}
66+
67+
/*
68+
Allocates a heap on the device by creating a BO of type dev heap
69+
*/
70+
int alloc_heap(int fd, __u32 size, __u32 *handle) {
71+
int ret;
72+
void *heap_buf = NULL;
73+
const size_t alignment = 64 * 1024 * 1024;
74+
ret = posix_memalign(&heap_buf, alignment, size);
75+
if (ret != 0 || heap_buf == NULL) {
76+
printf("[ERROR] Failed to allocate heap buffer of size %d\n", size);
77+
}
78+
79+
void *dev_heap_parent = mmap(0, alignment * 2 - 1, PROT_READ | PROT_WRITE,
80+
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
81+
82+
if (dev_heap_parent == MAP_FAILED) {
83+
dev_heap_parent = nullptr;
84+
return -1;
85+
}
86+
87+
amdxdna_drm_create_bo create_bo_params = {
88+
.type = AMDXDNA_BO_DEV_HEAP,
89+
.size = size,
90+
};
91+
92+
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_bo_params);
93+
if (ret == 0 && handle) {
94+
*handle = create_bo_params.handle;
95+
}
96+
97+
amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo_params.handle};
98+
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info);
99+
if (ret != 0) {
100+
perror("Failed to get BO info");
101+
return -2;
102+
}
103+
104+
// Need to free the heap buf but still use the address so we can
105+
// ensure alignment
106+
free(heap_buf);
107+
heap_buf = (void *)mmap(heap_buf, size, PROT_READ | PROT_WRITE, MAP_SHARED,
108+
fd, get_bo_info.map_offset);
109+
printf("Heap buffer @: %p\n", heap_buf);
110+
111+
return ret;
112+
}
113+
114+
/*
115+
Creates a dev bo which is carved out of the heap bo.
116+
*/
117+
int create_dev_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr, __u32 *handle,
118+
__u64 size_in_bytes) {
119+
amdxdna_drm_create_bo create_bo = {
120+
.type = AMDXDNA_BO_DEV,
121+
.size = size_in_bytes,
122+
};
123+
int ret = ioctl(fd, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_bo);
124+
if (ret != 0) {
125+
perror("Failed to create BO");
126+
return -1;
127+
}
128+
129+
amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo.handle};
130+
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info);
131+
if (ret != 0) {
132+
perror("Failed to get BO info");
133+
return -2;
134+
}
135+
136+
*vaddr = get_bo_info.vaddr;
137+
*sram_vaddr = get_bo_info.xdna_addr;
138+
*handle = create_bo.handle;
139+
return 0;
140+
}
141+
142+
/*
143+
Creates a shmem bo
144+
*/
145+
int create_shmem_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,
146+
__u32 *handle, __u64 size_in_bytes) {
147+
const size_t alignment = 64 * 1024 * 1024;
148+
void *shmem_create = NULL;
149+
int ret = posix_memalign(&shmem_create, alignment, size_in_bytes);
150+
if (ret != 0) {
151+
printf("[ERROR] Failed to allocate shmem bo of size %lld\n", size_in_bytes);
152+
}
153+
154+
// Touching buffer to map page
155+
*(uint32_t *)shmem_create = 0xDEADBEEF;
156+
157+
printf("Shmem BO @: %p\n", shmem_create);
158+
159+
amdxdna_drm_create_bo create_bo = {.type = AMDXDNA_BO_SHMEM,
160+
.vaddr = (__u64)shmem_create,
161+
.size = size_in_bytes};
162+
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_bo);
163+
if (ret != 0) {
164+
perror("Failed to create BO");
165+
return -1;
166+
}
167+
168+
amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo.handle};
169+
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info);
170+
if (ret != 0) {
171+
perror("Failed to get BO info");
172+
return -2;
173+
}
174+
175+
*vaddr = (__u64)shmem_create;
176+
*sram_vaddr = get_bo_info.xdna_addr;
177+
*handle = create_bo.handle;
178+
return 0;
179+
}
180+
181+
/*
182+
Wrapper around synch bo ioctl.
183+
*/
184+
int sync_bo(int fd, __u32 handle) {
185+
amdxdna_drm_sync_bo sync_params = {.handle = handle};
186+
int ret = ioctl(fd, DRM_IOCTL_AMDXDNA_SYNC_BO, &sync_params);
187+
if (ret != 0) {
188+
printf("Synch bo ioctl failed for handle %d\n", handle);
189+
}
190+
return ret;
191+
}
192+
193+
/*
194+
Create a BO_DEV and populate it with a PDI
195+
*/
196+
197+
int load_pdi(int fd, uint64_t *vaddr, uint64_t *sram_addr, __u32 *handle,
198+
const char *path) {
199+
FILE *file = fopen(path, "r");
200+
if (file == NULL) {
201+
perror("Failed to open instructions file.");
202+
return -1;
203+
}
204+
205+
fseek(file, 0L, SEEK_END);
206+
ssize_t file_size = ftell(file);
207+
fseek(file, 0L, SEEK_SET);
208+
209+
printf("Pdi file size: %ld\n", file_size);
210+
211+
fclose(file);
212+
213+
// Mmaping the file
214+
int pdi_fd = open(path, O_RDONLY);
215+
uint64_t *file_data =
216+
(uint64_t *)mmap(0, file_size, PROT_READ, MAP_PRIVATE, pdi_fd, 0);
217+
218+
// Creating a BO_DEV bo to store the pdi file.
219+
int ret = create_dev_bo(fd, vaddr, sram_addr, handle, file_size);
220+
if (ret != 0) {
221+
perror("Failed to create pdi BO");
222+
return -1;
223+
}
224+
225+
// copy the file into Bo dev
226+
uint64_t *bo = (uint64_t *)*vaddr;
227+
memcpy(bo, file_data, file_size);
228+
229+
close(pdi_fd);
230+
return 0;
231+
}
232+
233+
/*
234+
Create a BO DEV and populate it with instructions whose virtual address is
235+
passed to the driver via an HSA packet.
236+
*/
237+
int load_instructions(int fd, uint64_t *vaddr, uint64_t *sram_addr,
238+
__u32 *handle, const char *path, __u32 *num_inst) {
239+
// read dpu instructions into an array
240+
FILE *file = fopen(path, "r");
241+
if (file == NULL) {
242+
perror("Failed to open instructions file.");
243+
return -1;
244+
}
245+
246+
char *line = NULL;
247+
size_t len = 0;
248+
__u32 inst_array[MAX_NUM_INSTRUCTIONS];
249+
__u32 inst_counter = 0;
250+
while (getline(&line, &len, file) != -1) {
251+
inst_array[inst_counter++] = strtoul(line, NULL, 16);
252+
if (inst_counter >= MAX_NUM_INSTRUCTIONS) {
253+
perror("Instruction array overflowed.");
254+
return -2;
255+
}
256+
}
257+
fclose(file);
258+
259+
// Creating a BO_DEV bo to store the instruction.
260+
int ret =
261+
create_dev_bo(fd, vaddr, sram_addr, handle, inst_counter * sizeof(__u32));
262+
if (ret != 0) {
263+
perror("Failed to create dpu BO");
264+
return -3;
265+
}
266+
267+
*num_inst = inst_counter;
268+
269+
memcpy((__u32 *)*vaddr, inst_array, inst_counter * sizeof(__u32));
270+
return ret;
271+
}

0 commit comments

Comments
 (0)
This repository has been archived.