Skip to content

Commit 66b78a7

Browse files
Add placeholder operator (#239)
Co-authored-by: Noli Gerawork <[email protected]>
1 parent f7c6867 commit 66b78a7

31 files changed

+887
-460
lines changed

ark/api/executor.cpp

+124-75
Large diffs are not rendered by default.

ark/api/tensor.cpp

-12
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,6 @@
99

1010
namespace ark {
1111

12-
Tensor::Tensor(void* data_ptr, int32_t device_id,
13-
const std::vector<int64_t>& shape, const DataType& dtype) {
14-
size_t external_data_size = std::accumulate(shape.begin(), shape.end(), 1,
15-
std::multiplies<int64_t>()) *
16-
dtype.bytes();
17-
auto buffer =
18-
std::make_shared<ModelBuffer>(data_ptr, external_data_size, device_id);
19-
auto tensor = std::make_shared<ModelTensor>(
20-
dtype.ref(), buffer, Dims(shape), Dims(shape), Dims(), Dims());
21-
ref_ = tensor;
22-
}
23-
2412
size_t Tensor::id() const {
2513
if (ref_) {
2614
return ref_->id();

ark/codegen.cpp

+29-31
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
#include "codegen.hpp"
55

66
#include <list>
7+
#include <utility>
78

89
#include "ark/data_type.hpp"
910
#include "env.h"
11+
#include "external_buffer_registry.hpp"
1012
#include "file_io.h"
1113
#include "logging.hpp"
1214
#include "model/model_buffer.hpp"
1315
#include "model/model_data_type.hpp"
1416
#include "model/model_op.hpp"
1517
#include "model/model_tensor.hpp"
16-
#include "model_buffer_manager.hpp"
1718
#include "range.hpp"
1819
#include "utils/utils_math.hpp"
1920

@@ -55,8 +56,8 @@ class CodeGenerator::Impl {
5556
public:
5657
Impl(const PlanJson &plan,
5758
const std::map<size_t, size_t> &buffer_id_to_offset,
58-
const std::vector<std::string> &external_args,
59-
const std::map<size_t, std::string> &buffer_id_to_name,
59+
const std::map<size_t, std::pair<std::string, void *>>
60+
&buffer_id_to_kernel_arg,
6061
const std::string &name);
6162
~Impl() = default;
6263

@@ -82,8 +83,7 @@ class CodeGenerator::Impl {
8283
friend class CodeGenerator;
8384

8485
std::map<size_t, size_t> buffer_id_to_offset_;
85-
std::vector<std::string> external_args_;
86-
std::map<size_t, std::string> buffer_id_to_name_;
86+
std::map<size_t, std::pair<std::string, void *>> buffer_id_to_kernel_arg_;
8787
std::string name_;
8888
int rank_;
8989
int world_size_;
@@ -92,14 +92,13 @@ class CodeGenerator::Impl {
9292
std::string code_;
9393
};
9494

95-
CodeGenerator::Impl::Impl(
96-
const PlanJson &plan, const std::map<size_t, size_t> &buffer_id_to_offset,
97-
const std::vector<std::string> &external_args,
98-
const std::map<size_t, std::string> &buffer_id_to_name,
99-
const std::string &name)
95+
CodeGenerator::Impl::Impl(const PlanJson &plan,
96+
const std::map<size_t, size_t> &buffer_id_to_offset,
97+
const std::map<size_t, std::pair<std::string, void *>>
98+
&buffer_id_to_kernel_arg,
99+
const std::string &name)
100100
: buffer_id_to_offset_(buffer_id_to_offset),
101-
external_args_(external_args),
102-
buffer_id_to_name_(buffer_id_to_name),
101+
buffer_id_to_kernel_arg_(buffer_id_to_kernel_arg),
103102
name_(name) {
104103
rank_ = plan.at("Rank");
105104
world_size_ = plan.at("WorldSize");
@@ -192,9 +191,10 @@ CodeGenerator::Impl::Impl(
192191

193192
// Generate the global arguments
194193
std::stringstream global_args_ss, function_args_ss, arg_types_ss;
195-
for (const auto &arg : external_args_) {
196-
global_args_ss << "void *" << arg << ", ";
197-
function_args_ss << arg << ", ";
194+
for (const auto &[buf_id, kernel_arg] : buffer_id_to_kernel_arg_) {
195+
const auto &arg_name = kernel_arg.first;
196+
global_args_ss << "void *" << arg_name << ", ";
197+
function_args_ss << arg_name << ", ";
198198
arg_types_ss << "void *, ";
199199
}
200200
std::string global_args = global_args_ss.str();
@@ -219,7 +219,7 @@ CodeGenerator::Impl::Impl(
219219
{"@NUM_WARPS_PER_BLOCK@", std::to_string(num_warps_per_proc_)},
220220
{"@DEFINITIONS@", definitions_ss.str()},
221221
{"@BODY@", body_ss.str()},
222-
{"@NAME@", (name_.empty() ? "" : "_" + name_)},
222+
{"@NAME@", (!name_.empty() ? "" : name_)},
223223
{"@GLOBAL_ARGS@", global_args},
224224
{"@FUNCTION_ARGS@", function_args},
225225
{"@ARG_TYPES@", arg_types},
@@ -273,29 +273,28 @@ std::string CodeGenerator::Impl::def_task(const Json &task_json) {
273273
if (arg.type_name() == "TENSOR") {
274274
auto tns = arg.value<ModelTensorRef>();
275275
size_t buffer_id = tns->buffer()->id();
276-
if (buffer_id_to_name_.find(buffer_id) ==
277-
buffer_id_to_name_.end()) {
276+
auto it = buffer_id_to_kernel_arg_.find(buffer_id);
277+
if (it == buffer_id_to_kernel_arg_.end()) {
278278
size_t buffer_offset = buffer_id_to_offset_.at(buffer_id);
279279
size_t offset = buffer_offset + ModelOffset(tns).value();
280280
ss << "(" << tns->data_type()->type_str() << "*)&_buf["
281281
<< offset << "]";
282282
} else {
283-
ss << "(" << tns->data_type()->type_str() << "*)"
284-
<< buffer_id_to_name_.at(buffer_id);
283+
const auto &name = it->second.first;
284+
ss << "(" << tns->data_type()->type_str() << "*)" << name;
285285
}
286286
} else if (arg.type_name() == "OFFSET") {
287287
auto moff = arg.value<ModelOffset>();
288288
size_t buffer_id = moff.buffer_id();
289-
if (buffer_id_to_name_.find(buffer_id) ==
290-
buffer_id_to_name_.end()) {
289+
auto it = buffer_id_to_kernel_arg_.find(buffer_id);
290+
if (it == buffer_id_to_kernel_arg_.end()) {
291291
size_t buffer_offset = buffer_id_to_offset_.at(buffer_id);
292292
size_t offset = buffer_offset + moff.value();
293293
ss << offset;
294294
} else {
295-
const std::string &buffer_name =
296-
buffer_id_to_name_.at(buffer_id);
295+
const auto &name = it->second.first;
297296
size_t offset = moff.value();
298-
ss << "(uint64_t)((char*)" << buffer_name << " + " << offset
297+
ss << "(uint64_t)((char*)" << name << " + " << offset
299298
<< ")";
300299
}
301300
} else {
@@ -372,8 +371,7 @@ std::string CodeGenerator::Impl::resource_group(
372371
n_slots = total_warps / num_warps_per_task;
373372
}
374373
if (n_slots == 0) {
375-
ERR(PlanError, "not enough resources for task group: ",
376-
tg.dump());
374+
ERR(PlanError, "not enough resources for task group: ", tg.dump());
377375
}
378376

379377
size_t task_b = *task_range.begin();
@@ -498,11 +496,11 @@ std::string CodeGenerator::Impl::sync_process_range(const Range<size_t> &range,
498496

499497
CodeGenerator::CodeGenerator(
500498
const PlanJson &plan, const std::map<size_t, size_t> &buffer_id_to_offset,
501-
const std::vector<std::string> &external_args,
502-
const std::map<size_t, std::string> &buffer_id_to_name,
499+
const std::map<size_t, std::pair<std::string, void *>>
500+
&buffer_id_to_kernel_arg,
503501
const std::string &name)
504-
: impl_(std::make_shared<Impl>(plan, buffer_id_to_offset, external_args,
505-
buffer_id_to_name, name)) {}
502+
: impl_(std::make_shared<Impl>(plan, buffer_id_to_offset,
503+
buffer_id_to_kernel_arg, name)) {}
506504

507505
std::string CodeGenerator::code() const { return impl_->code_; }
508506

ark/codegen.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,18 @@
77
#include <map>
88
#include <memory>
99
#include <string>
10+
#include <utility>
1011

1112
#include "model/model_json.hpp"
12-
#include "model_buffer_manager.hpp"
1313

1414
namespace ark {
1515

1616
class CodeGenerator {
1717
public:
1818
CodeGenerator(const PlanJson &plan,
1919
const std::map<size_t, size_t> &buffer_id_to_offset,
20-
const std::vector<std::string> &external_args,
21-
const std::map<size_t, std::string> &buffer_id_to_name,
20+
const std::map<size_t, std::pair<std::string, void *>>
21+
&buffer_id_to_kernel_arg,
2222
const std::string &name = "ark_kernel");
2323

2424
~CodeGenerator() = default;

ark/cpu_timer.cpp

-16
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,4 @@ double cpu_timer(void) {
1616
return (tspec.tv_nsec / 1.0e9) + tspec.tv_sec;
1717
}
1818

19-
// Sleep in second.
20-
int cpu_timer_sleep(double sec) {
21-
struct timespec tspec;
22-
tspec.tv_sec = (time_t)sec;
23-
tspec.tv_nsec = (long)((sec - tspec.tv_sec) * 1.0e9);
24-
return nanosleep(&tspec, 0);
25-
}
26-
27-
// Sleep in nanosecond.
28-
int cpu_ntimer_sleep(long nsec) {
29-
struct timespec tspec;
30-
tspec.tv_sec = 0;
31-
tspec.tv_nsec = nsec;
32-
return nanosleep(&tspec, 0);
33-
}
34-
3519
} // namespace ark

ark/cpu_timer.h

-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@ namespace ark {
88

99
// Measure current time in second.
1010
double cpu_timer(void);
11-
// Sleep in second.
12-
int cpu_timer_sleep(double sec);
13-
// Sleep in nanosecond.
14-
int cpu_ntimer_sleep(long nsec);
1511

1612
} // namespace ark
1713

ark/external_buffer_registry.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
3+
4+
#include "external_buffer_registry.hpp"
5+
6+
#include "logging.hpp"
7+
8+
namespace ark {
9+
10+
ExternalBufferRegistry &ExternalBufferRegistry::get_instance() {
11+
static ExternalBufferRegistry instance;
12+
return instance;
13+
}
14+
15+
void ExternalBufferRegistry::set(const size_t id, void *data) {
16+
buffers_[id] = data;
17+
}
18+
19+
void *ExternalBufferRegistry::get(const size_t id) const {
20+
auto it = buffers_.find(id);
21+
if (it != buffers_.end()) {
22+
return it->second;
23+
}
24+
return nullptr;
25+
}
26+
27+
void ExternalBufferRegistry::clear() { buffers_.clear(); }
28+
29+
} // namespace ark

ark/external_buffer_registry.hpp

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT license.
3+
4+
#ifndef ARK_EXTERNAL_BUFFER_REGISTRY_HPP_
5+
#define ARK_EXTERNAL_BUFFER_REGISTRY_HPP_
6+
7+
#include <unordered_map>
8+
9+
namespace ark {
10+
// Manages externally allocated buffers (buffers corresponding to Tensors that
11+
// are the output of a `placeholder` operation) outside of ARK's memory space.
12+
class ExternalBufferRegistry {
13+
public:
14+
static ExternalBufferRegistry &get_instance();
15+
16+
void set(const size_t id, void *data);
17+
18+
void *get(const size_t id) const;
19+
20+
void clear();
21+
22+
private:
23+
// Maps buffer IDs to pointers and sizes.
24+
std::unordered_map<size_t, void *> buffers_;
25+
ExternalBufferRegistry() {}
26+
ExternalBufferRegistry(const ExternalBufferRegistry &) = delete;
27+
ExternalBufferRegistry &operator=(const ExternalBufferRegistry &) = delete;
28+
};
29+
} // namespace ark
30+
31+
#endif // ARK_EXTERNAL_BUFFER_REGISTRY_HPP_

ark/gpu/gpu.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ ARK_GPU_DEFINE_TYPE_ALIAS(gpuModule, CUmodule, hipModule_t);
5353
ARK_GPU_DEFINE_TYPE_ALIAS(gpuFunction, CUfunction, hipFunction_t);
5454
ARK_GPU_DEFINE_TYPE_ALIAS(gpuFunctionAttribute, CUfunction_attribute,
5555
hipFunction_attribute);
56+
ARK_GPU_DEFINE_TYPE_ALIAS(gpuPointerAttributes, cudaPointerAttributes,
57+
hipPointerAttributes);
5658

5759
// runtime API
5860
ARK_GPU_DEFINE_CONSTANT_ALIAS(gpuSuccess, cudaSuccess, hipSuccess);
@@ -126,6 +128,8 @@ ARK_GPU_DEFINE_CONSTANT_ALIAS(gpuPointerAttributeSyncMemops,
126128
ARK_GPU_DEFINE_FUNC_ALIAS(gpuGetErrorString, cudaGetErrorString,
127129
hipGetErrorString);
128130
ARK_GPU_DEFINE_FUNC_ALIAS(gpuGetLastError, cudaGetLastError, hipGetLastError);
131+
ARK_GPU_DEFINE_FUNC_ALIAS(gpuPointerGetAttributes, cudaPointerGetAttributes,
132+
hipPointerGetAttributes);
129133
ARK_GPU_DEFINE_FUNC_ALIAS(gpuDeviceGetAttribute, cudaDeviceGetAttribute,
130134
hipDeviceGetAttribute);
131135
ARK_GPU_DEFINE_FUNC_ALIAS(gpuDeviceSynchronize, cudaDeviceSynchronize,

ark/include/ark/executor.hpp

+14-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <ark/tensor.hpp>
1010
#include <memory>
1111
#include <string>
12+
#include <unordered_map>
1213
#include <vector>
1314

1415
namespace ark {
@@ -45,10 +46,14 @@ class Executor {
4546
const std::string &name = "executor");
4647

4748
/// Launch the executor. This must be called after `compile()`.
48-
void launch(Stream stream = nullptr, bool loop_mode = true);
49+
void launch(
50+
Stream stream = nullptr, bool loop_mode = true,
51+
const std::unordered_map<Tensor, void *> &placeholder_data = {});
4952

5053
/// Run the executor for `iter` iterations.
51-
void run(int iter);
54+
void run(
55+
int iter,
56+
const std::unordered_map<Tensor, void *> &placeholder_data = {});
5257

5358
/// Wait for the previous run to finish.
5459
void wait(int64_t max_spin_count = -1);
@@ -99,13 +104,15 @@ class Model;
99104

100105
class DefaultExecutor : public Executor {
101106
public:
102-
DefaultExecutor(
103-
const Model &model, int device_id = -1, Stream stream = nullptr,
104-
const std::vector<Planner::ConfigRule> &config_rules = {},
105-
const std::string &name = "DefaultExecutor", bool loop_mode = true);
107+
DefaultExecutor(const Model &model, int device_id = -1,
108+
Stream stream = nullptr,
109+
const std::vector<Planner::ConfigRule> &config_rules = {},
110+
const std::string &name = "DefaultExecutor",
111+
bool loop_mode = true);
106112

107113
/// Launch the default executor.
108-
void launch();
114+
void launch(
115+
const std::unordered_map<Tensor, void *> &placeholder_data = {});
109116
};
110117

111118
} // namespace ark

ark/include/ark/model.hpp

+31-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,37 @@ class Model : public ModelGraph {
7676
const Dims &padded_shape = {}, int rank = -1,
7777
const std::string &name = "");
7878

79+
///
80+
/// Returns a tensor object associated with an external buffer.
81+
///
82+
/// @param shape Shape of the tensor, where the data of interest is.
83+
/// @param dtype Type of the tensor data.
84+
/// @param strides Strides of each dimension of the tensor, which may be
85+
/// different from the shape. @p strides can be considered as the actual
86+
/// shape of the underlying data buffer.
87+
/// @param offsets Offsets of the tensor. The data of interest starts at
88+
/// @p offsets and ends at @p offsets + @p padded_shape.
89+
/// @param padded_shape Padded shape of the tensor. Padding is used to
90+
/// reserve extra space for the tensor when computation requires it.
91+
/// Data on the padded region is allowed to be accessed by computation,
92+
/// but it is not considered as the data of interest. The padded region is
93+
/// initialized to zero only once when the Executor is launched. The padded
94+
/// shape should be greater than or equal to the @p shape, and the
95+
/// @p strides should be greater than or equal to the padded shape. If the
96+
/// @p strides are not provided, they are set to the padded shape. If the
97+
/// padded shape is not provided, it is set to the @p shape.
98+
/// @param rank Rank of the tensor. -1 means the rank of this model.
99+
/// @param name Name of the tensor.
100+
/// @param data Address of data to pass through placeholder. If provided,
101+
/// this buffer is registered with the ExternalBufferRegistry and associated
102+
/// with the tensor.
103+
/// @return Pointer to a tensor object that references the external buffer.
104+
///
105+
Tensor placeholder(const Dims &shape, const DataType &data_type,
106+
const Dims &strides = {}, const Dims &offsets = {},
107+
const Dims &padded_shape = {}, int rank = -1,
108+
void *data = nullptr, const std::string &name = "");
109+
79110
Tensor refer(Tensor input, const Dims &shape = {}, const Dims &strides = {},
80111
const Dims &offsets = {}, const Dims &padded_shape = {},
81112
const std::string &name = "");
@@ -254,7 +285,6 @@ class Model : public ModelGraph {
254285

255286
Tensor local_all_reduce(Tensor input, int gpu_id, int gpu_num,
256287
const std::string &name = "");
257-
258288
};
259289

260290
} // namespace ark

0 commit comments

Comments
 (0)