Skip to content

Commit 783b38b

Browse files
authored
GC Reloaded (#75)
* - GC reloaded: letting MLIR deallocate while making sure Python does not interfere * get the order of delivery callbacks right by using a single vector to keep data in DepManager * create extra function for deletions so they do not interfere with jit-caching
1 parent 65ad19a commit 783b38b

26 files changed

+598
-458
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# See https://pre-commit.com/hooks.html for more hooks
33
repos:
44
- repo: https://github.com/pre-commit/pre-commit-hooks
5-
rev: v4.4.0
5+
rev: v4.5.0
66
hooks:
77
- id: end-of-file-fixer
88
- id: trailing-whitespace
@@ -12,7 +12,7 @@ repos:
1212
- id: clang-format
1313
args: ["-i"]
1414
- repo: https://github.com/psf/black
15-
rev: 23.3.0
15+
rev: 23.12.0
1616
hooks:
1717
- id: black
1818
language_version: python3

CMakeLists.txt

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,8 @@ set(SHARPYSrcs
107107
${PROJECT_SOURCE_DIR}/src/ReduceOp.cpp
108108
${PROJECT_SOURCE_DIR}/src/SetGetItem.cpp
109109
${PROJECT_SOURCE_DIR}/src/jit/mlir.cpp
110-
${PROJECT_SOURCE_DIR}/src/Service.cpp
111110
${PROJECT_SOURCE_DIR}/src/Deferred.cpp
112-
)
113-
set(RTSrcs
111+
${PROJECT_SOURCE_DIR}/src/Service.cpp
114112
${PROJECT_SOURCE_DIR}/src/Mediator.cpp
115113
${PROJECT_SOURCE_DIR}/src/MPIMediator.cpp
116114
${PROJECT_SOURCE_DIR}/src/CollComm.cpp
@@ -126,9 +124,8 @@ set(IDTRSrcs
126124
)
127125

128126
pybind11_add_module(_sharpy MODULE ${SHARPYSrcs} ${Hpps})
129-
add_library(_sharpy_rt SHARED ${RTSrcs} ${Hpps})
130127
add_library(idtr SHARED ${IDTRSrcs} ${Hpps})
131-
set(AllTargets _sharpy _sharpy_rt idtr)
128+
set(AllTargets _sharpy idtr)
132129

133130
add_compile_definitions(USE_MKL=1)
134131
add_compile_options("-ftemplate-backtrace-limit=0")
@@ -144,7 +141,6 @@ include_directories(
144141

145142
if (CMAKE_SYSTEM_NAME STREQUAL Linux)
146143
target_link_options(_sharpy PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/export.txt")
147-
target_link_options(_sharpy_rt PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/export-sharpy_rt.txt")
148144
# target_link_options(idtr PRIVATE "LINKER:-fvisibility=hidden" "LINKER:--exclude-libs,All")
149145
endif()
150146

@@ -158,7 +154,6 @@ get_property(imex_conversion_libs GLOBAL PROPERTY IMEX_CONVERSION_LIBS)
158154

159155
#llvm_update_compile_flags(_sharpy)
160156
target_link_directories(_sharpy PRIVATE ${CONDA_PREFIX}/lib)
161-
target_link_directories(_sharpy_rt PRIVATE ${CONDA_PREFIX}/lib) # ${IMEX_INSTALL_PREFIX}/lib)
162157
target_link_directories(idtr PRIVATE ${CONDA_PREFIX}/lib)
163158

164159
target_link_libraries(_sharpy PRIVATE
@@ -176,15 +171,10 @@ target_link_libraries(_sharpy PRIVATE
176171
IMEXUtil
177172
LLVMX86CodeGen
178173
LLVMX86AsmParser
179-
_sharpy_rt
180174
idtr
181-
)
182-
target_link_libraries(idtr PRIVATE
183-
${MPI_C_LIBRARIES}
184-
# ${MKL_LIBRARIES}
185175
tbb
186176
)
187-
target_link_libraries(_sharpy_rt PRIVATE
177+
target_link_libraries(idtr PRIVATE
188178
${MPI_C_LIBRARIES}
189179
# ${MKL_LIBRARIES}
190180
tbb

imex_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ea310778459a22ff8889a3c5cf3aba39a6e9e772
1+
a84e52b9c5074a71f9b935cf7f0c0384b462f3bd

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def build_cmake(self, ext):
4040
build_args = [
4141
"--config",
4242
config,
43-
"-j8"
43+
"-j4"
4444
# '--', '-j4'
4545
]
4646

src/Creator.cpp

Lines changed: 57 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
*/
44

55
#include "sharpy/Creator.hpp"
6-
#include "sharpy/NDArray.hpp"
76
#include "sharpy/Deferred.hpp"
87
#include "sharpy/Factory.hpp"
8+
#include "sharpy/NDArray.hpp"
99
#include "sharpy/Transceiver.hpp"
1010
#include "sharpy/TypeDispatch.hpp"
1111
#include "sharpy/jit/mlir.hpp"
@@ -82,12 +82,11 @@ struct DeferredFull : public Deferred {
8282
const intptr_t *r_strides, uint64_t *lo_allocated,
8383
uint64_t *lo_aligned) {
8484
assert(rank == this->rank());
85-
this->set_value(std::move(
86-
mk_tnsr(reinterpret_cast<Transceiver *>(this->team()), _dtype,
87-
this->shape(), l_allocated, l_aligned, l_offset, l_sizes,
88-
l_strides, o_allocated, o_aligned, o_offset, o_sizes,
89-
o_strides, r_allocated, r_aligned, r_offset, r_sizes,
90-
r_strides, lo_allocated, lo_aligned)));
85+
this->set_value(std::move(mk_tnsr(
86+
this->guid(), _dtype, this->shape(), this->device(), this->team(),
87+
l_allocated, l_aligned, l_offset, l_sizes, l_strides, o_allocated,
88+
o_aligned, o_offset, o_sizes, o_strides, r_allocated, r_aligned,
89+
r_offset, r_sizes, r_strides, lo_allocated, lo_aligned)));
9190
});
9291
return false;
9392
}
@@ -102,8 +101,8 @@ struct DeferredFull : public Deferred {
102101
};
103102

104103
FutureArray *Creator::full(const shape_type &shape, const py::object &val,
105-
DTypeId dtype, const std::string &device,
106-
uint64_t team) {
104+
DTypeId dtype, const std::string &device,
105+
uint64_t team) {
107106
auto v = mk_scalar(val, dtype);
108107
return new FutureArray(
109108
defer<DeferredFull>(shape, v, dtype, device, mkTeam(team)));
@@ -132,26 +131,26 @@ struct DeferredArange : public Deferred {
132131
auto dtyp = jit::getPTDType(dtype());
133132
auto envs = jit::mkEnvs(builder, rank(), _device, team());
134133

135-
dm.addVal(this->guid(),
136-
builder.create<::imex::ndarray::LinSpaceOp>(loc, start, stop, num,
137-
false, dtyp, envs),
138-
[this](uint64_t rank, void *l_allocated, void *l_aligned,
139-
intptr_t l_offset, const intptr_t *l_sizes,
140-
const intptr_t *l_strides, void *o_allocated,
141-
void *o_aligned, intptr_t o_offset,
142-
const intptr_t *o_sizes, const intptr_t *o_strides,
143-
void *r_allocated, void *r_aligned, intptr_t r_offset,
144-
const intptr_t *r_sizes, const intptr_t *r_strides,
145-
uint64_t *lo_allocated, uint64_t *lo_aligned) {
146-
assert(rank == 1);
147-
assert(o_strides[0] == 1);
148-
this->set_value(std::move(mk_tnsr(
149-
reinterpret_cast<Transceiver *>(this->team()), _dtype,
150-
this->shape(), l_allocated, l_aligned, l_offset, l_sizes,
151-
l_strides, o_allocated, o_aligned, o_offset, o_sizes,
152-
o_strides, r_allocated, r_aligned, r_offset, r_sizes,
153-
r_strides, lo_allocated, lo_aligned)));
154-
});
134+
dm.addVal(
135+
this->guid(),
136+
builder.create<::imex::ndarray::LinSpaceOp>(loc, start, stop, num,
137+
false, dtyp, envs),
138+
[this](uint64_t rank, void *l_allocated, void *l_aligned,
139+
intptr_t l_offset, const intptr_t *l_sizes,
140+
const intptr_t *l_strides, void *o_allocated, void *o_aligned,
141+
intptr_t o_offset, const intptr_t *o_sizes,
142+
const intptr_t *o_strides, void *r_allocated, void *r_aligned,
143+
intptr_t r_offset, const intptr_t *r_sizes,
144+
const intptr_t *r_strides, uint64_t *lo_allocated,
145+
uint64_t *lo_aligned) {
146+
assert(rank == 1);
147+
assert(o_strides[0] == 1);
148+
this->set_value(std::move(mk_tnsr(
149+
this->guid(), _dtype, this->shape(), this->device(), this->team(),
150+
l_allocated, l_aligned, l_offset, l_sizes, l_strides, o_allocated,
151+
o_aligned, o_offset, o_sizes, o_strides, r_allocated, r_aligned,
152+
r_offset, r_sizes, r_strides, lo_allocated, lo_aligned)));
153+
});
155154
return false;
156155
}
157156

@@ -165,8 +164,8 @@ struct DeferredArange : public Deferred {
165164
};
166165

167166
FutureArray *Creator::arange(uint64_t start, uint64_t end, uint64_t step,
168-
DTypeId dtype, const std::string &device,
169-
uint64_t team) {
167+
DTypeId dtype, const std::string &device,
168+
uint64_t team) {
170169
return new FutureArray(
171170
defer<DeferredArange>(start, end, step, dtype, device, mkTeam(team)));
172171
}
@@ -193,26 +192,26 @@ struct DeferredLinspace : public Deferred {
193192
auto dtyp = jit::getPTDType(dtype());
194193
auto envs = jit::mkEnvs(builder, rank(), _device, team());
195194

196-
dm.addVal(this->guid(),
197-
builder.create<::imex::ndarray::LinSpaceOp>(
198-
loc, start, stop, num, _endpoint, dtyp, envs),
199-
[this](uint64_t rank, void *l_allocated, void *l_aligned,
200-
intptr_t l_offset, const intptr_t *l_sizes,
201-
const intptr_t *l_strides, void *o_allocated,
202-
void *o_aligned, intptr_t o_offset,
203-
const intptr_t *o_sizes, const intptr_t *o_strides,
204-
void *r_allocated, void *r_aligned, intptr_t r_offset,
205-
const intptr_t *r_sizes, const intptr_t *r_strides,
206-
uint64_t *lo_allocated, uint64_t *lo_aligned) {
207-
assert(rank == 1);
208-
assert(l_strides[0] == 1);
209-
this->set_value(std::move(mk_tnsr(
210-
reinterpret_cast<Transceiver *>(this->team()), _dtype,
211-
this->shape(), l_allocated, l_aligned, l_offset, l_sizes,
212-
l_strides, o_allocated, o_aligned, o_offset, o_sizes,
213-
o_strides, r_allocated, r_aligned, r_offset, r_sizes,
214-
r_strides, lo_allocated, lo_aligned)));
215-
});
195+
dm.addVal(
196+
this->guid(),
197+
builder.create<::imex::ndarray::LinSpaceOp>(loc, start, stop, num,
198+
_endpoint, dtyp, envs),
199+
[this](uint64_t rank, void *l_allocated, void *l_aligned,
200+
intptr_t l_offset, const intptr_t *l_sizes,
201+
const intptr_t *l_strides, void *o_allocated, void *o_aligned,
202+
intptr_t o_offset, const intptr_t *o_sizes,
203+
const intptr_t *o_strides, void *r_allocated, void *r_aligned,
204+
intptr_t r_offset, const intptr_t *r_sizes,
205+
const intptr_t *r_strides, uint64_t *lo_allocated,
206+
uint64_t *lo_aligned) {
207+
assert(rank == 1);
208+
assert(l_strides[0] == 1);
209+
this->set_value(std::move(mk_tnsr(
210+
this->guid(), _dtype, this->shape(), this->device(), this->team(),
211+
l_allocated, l_aligned, l_offset, l_sizes, l_strides, o_allocated,
212+
o_aligned, o_offset, o_sizes, o_strides, r_allocated, r_aligned,
213+
r_offset, r_sizes, r_strides, lo_allocated, lo_aligned)));
214+
});
216215
return false;
217216
}
218217

@@ -227,10 +226,10 @@ struct DeferredLinspace : public Deferred {
227226
};
228227

229228
FutureArray *Creator::linspace(double start, double end, uint64_t num,
230-
bool endpoint, DTypeId dtype,
231-
const std::string &device, uint64_t team) {
232-
return new FutureArray(defer<DeferredLinspace>(start, end, num, endpoint, dtype,
233-
device, mkTeam(team)));
229+
bool endpoint, DTypeId dtype,
230+
const std::string &device, uint64_t team) {
231+
return new FutureArray(defer<DeferredLinspace>(start, end, num, endpoint,
232+
dtype, device, mkTeam(team)));
234233
}
235234

236235
// ***************************************************************************
@@ -239,8 +238,9 @@ extern DTypeId DEFAULT_FLOAT;
239238
extern DTypeId DEFAULT_INT;
240239

241240
std::pair<FutureArray *, bool> Creator::mk_future(const py::object &b,
242-
const std::string &device,
243-
uint64_t team, DTypeId dtype) {
241+
const std::string &device,
242+
uint64_t team,
243+
DTypeId dtype) {
244244
if (py::isinstance<FutureArray>(b)) {
245245
return {b.cast<FutureArray *>(), false};
246246
} else if (py::isinstance<py::float_>(b) || py::isinstance<py::int_>(b)) {

src/Deferred.cpp

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ extern tbb::concurrent_bounded_queue<Runable::ptr_type> _deferred;
3333
// if needed, object/promise is broadcasted to worker processes
3434
// (for controller/worker mode)
3535
void _dist(const Runable *p) {
36-
if (getTransceiver()->is_cw() && getTransceiver()->rank() == 0)
36+
if (getTransceiver() && getTransceiver()->is_cw() &&
37+
getTransceiver()->rank() == 0)
3738
getMediator()->to_workers(p);
3839
}
3940

@@ -68,9 +69,7 @@ Deferred::future_type defer_array(Runable::ptr_type &&_d, bool is_global) {
6869
}
6970

7071
// defer a global array producer
71-
void Deferred::defer(Runable::ptr_type &&p) {
72-
defer_array(std::move(p), true);
73-
}
72+
void Deferred::defer(Runable::ptr_type &&p) { defer_array(std::move(p), true); }
7473

7574
void Runable::defer(Runable::ptr_type &&p) { push_runable(std::move(p)); }
7675

@@ -94,6 +93,7 @@ void process_promises() {
9493

9594
bool done = false;
9695
jit::JIT jit;
96+
std::vector<Runable::ptr_type> deleters;
9797

9898
do {
9999
::mlir::OpBuilder builder(&jit.context());
@@ -126,28 +126,42 @@ void process_promises() {
126126
std::vector<Runable::ptr_type> runables;
127127

128128
jit::DepManager dm(function);
129-
130129
Runable::ptr_type d;
131-
while (true) {
132-
VT(VT_begin, vtPopSym);
133-
_deferred.pop(d);
134-
VT(VT_end, vtPopSym);
135-
if (d) {
136-
if (d->generate_mlir(builder, loc, dm)) {
130+
131+
if (!deleters.empty()) {
132+
for (auto &dl : deleters) {
133+
if (dl->generate_mlir(builder, loc, dm)) {
134+
assert(!"deleters must generate MLIR");
135+
}
136+
runables.emplace_back(std::move(dl));
137+
}
138+
deleters.clear();
139+
} else {
140+
while (true) {
141+
VT(VT_begin, vtPopSym);
142+
_deferred.pop(d);
143+
VT(VT_end, vtPopSym);
144+
if (d) {
145+
if (d->isDeleter()) {
146+
deleters.emplace_back(std::move(d));
147+
} else {
148+
if (d->generate_mlir(builder, loc, dm)) {
149+
break;
150+
};
151+
// keep alive for later set_value
152+
runables.emplace_back(std::move(d));
153+
}
154+
} else {
155+
// signals system shutdown
156+
done = true;
137157
break;
138-
};
139-
// keep alive for later set_value
140-
runables.push_back(std::move(d));
141-
} else {
142-
// signals system shutdown
143-
done = true;
144-
break;
158+
}
145159
}
146160
}
147161

148162
if (!runables.empty()) {
149163
// get input buffers (before results!)
150-
auto input = std::move(dm.store_inputs());
164+
auto input = std::move(dm.finalize_inputs());
151165
// create return statement and adjust function type
152166
uint64_t osz = dm.handleResult(builder);
153167
// also request generation of c-wrapper function

0 commit comments

Comments
 (0)