-
Notifications
You must be signed in to change notification settings - Fork 112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: Hack to optimise based on known sizes #276
base: snmalloc1
Are you sure you want to change the base?
Changes from all commits
7944d8f
44374b5
8b294b2
57dfeb1
28c9d09
d48fd1e
f132034
d2166fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1040,7 +1040,8 @@ namespace snmalloc | |
allow_reserve == NoReserve ? "noreserve" : "reserve")); | ||
|
||
SNMALLOC_ASSUME(size <= SLAB_SIZE); | ||
sizeclass_t sizeclass = size_to_sizeclass(size); | ||
SNMALLOC_ASSUME(size > 0); | ||
sizeclass_t sizeclass = size_to_sizeclass<true>(size); | ||
return small_alloc_inner<zero_mem, allow_reserve>(sizeclass, size); | ||
} | ||
|
||
|
@@ -1066,6 +1067,13 @@ namespace snmalloc | |
return p; | ||
} | ||
|
||
return small_alloc_inner_slow<zero_mem, allow_reserve>(sizeclass, size); | ||
} | ||
|
||
template<ZeroMem zero_mem, AllowReserve allow_reserve> | ||
SNMALLOC_SLOW_PATH void* | ||
small_alloc_inner_slow(sizeclass_t sizeclass, size_t size) | ||
{ | ||
if (likely(!has_messages())) | ||
return small_alloc_next_free_list<zero_mem, allow_reserve>( | ||
sizeclass, size); | ||
|
@@ -1228,6 +1236,20 @@ namespace snmalloc | |
small_dealloc_offseted_inner(super, p, sizeclass); | ||
} | ||
|
||
static SNMALLOC_FAST_PATH bool small_local_dealloc(void* p) | ||
{ | ||
auto super = Superslab::get(p); | ||
Slab* slab = Metaslab::get_slab(p); | ||
return (likely(slab->dealloc_fast(super, p))); | ||
} | ||
|
||
SNMALLOC_FAST_PATH void small_local_dealloc_slow(void* p) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo |
||
{ | ||
auto super = Superslab::get(p); | ||
Slab* slab = Metaslab::get_slab(p); | ||
small_dealloc_offseted_slow(super, p, slab->get_meta().sizeclass); | ||
} | ||
|
||
SNMALLOC_FAST_PATH void small_dealloc_offseted_inner( | ||
Superslab* super, void* p, sizeclass_t sizeclass) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,6 +69,33 @@ extern "C" | |
return ThreadAlloc::get_noncachable()->alloc<ZeroMem::YesZero>(sz); | ||
} | ||
|
||
SNMALLOC_EXPORT | ||
void SNMALLOC_NAME_MANGLE(free_local_small)(void* ptr) | ||
{ | ||
if (Alloc::small_local_dealloc(ptr)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I'd feel slightly better if this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It has to at least be in ThreadAlloc, as we don't want to take the TLS lookup on the fast path. So we need that in scope. I think we probably need a better refactor if we actually want to support this design. This is more a hack to get some codegen, and see if the use case makes sense. |
||
return; | ||
ThreadAlloc::get_noncachable()->small_local_dealloc_slow(ptr); | ||
} | ||
|
||
SNMALLOC_EXPORT | ||
void* SNMALLOC_NAME_MANGLE(malloc_small)(size_t size) | ||
{ | ||
return ThreadAlloc::get_noncachable()->small_alloc<NoZero, YesReserve>( | ||
size); | ||
} | ||
|
||
SNMALLOC_EXPORT | ||
void* SNMALLOC_NAME_MANGLE(malloc_small_64)() | ||
{ | ||
return ThreadAlloc::get_noncachable()->small_alloc<NoZero, YesReserve>(64); | ||
} | ||
|
||
SNMALLOC_EXPORT | ||
void* SNMALLOC_NAME_MANGLE(malloc_small_63)() | ||
{ | ||
return ThreadAlloc::get_noncachable()->small_alloc<NoZero, YesReserve>(63); | ||
} | ||
|
||
SNMALLOC_EXPORT | ||
size_t SNMALLOC_NAME_MANGLE(malloc_usable_size)( | ||
MALLOC_USABLE_SIZE_QUALIFIER void* ptr) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#include "snmalloc.h" | ||
|
||
#include <fstream> | ||
#include <iostream> | ||
|
||
int main(int argc, char* argv[]) | ||
{ | ||
if (argc != 2) | ||
{ | ||
std::cerr << "Call with output file name" << std::endl; | ||
return 1; | ||
} | ||
|
||
// open a file in write mode. | ||
ofstream outfile; | ||
outfile.open(argv[1]); | ||
|
||
for (size_t align = 0; align < 10; align++) | ||
{ | ||
for (size_t size = 1024; size > 0; size -= 16) | ||
{ | ||
auto asize = snmalloc::aligned_size(1ULL << align, size); | ||
auto sizeclass = snmalloc::size_to_sizeclass(asize); | ||
auto rsize = snmalloc::sizeclass_to_size(sizeclass); | ||
if (rsize == size && align == 0) | ||
{ | ||
outfile << "DEFINE_MALLOC_SIZE(__stack_alloc_small_" << size << "_" << align << ", " << size | ||
<< ");" << std::endl; | ||
} | ||
else | ||
{ | ||
outfile << "REDIRECT_MALLOC_SIZE(__stack_alloc_small_" << size << "_" << align << ", __stack_alloc_small_" | ||
<< rsize << "_" << 0 << ");" << std::endl; | ||
} | ||
outfile << "GENERATE_FREE_SIZE(__stack_free_small_" << size << "_" << align << ");" << std::endl; | ||
|
||
} | ||
} | ||
|
||
outfile.close(); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
|
||
#include "snmalloc.h" | ||
|
||
#define NAME(a) malloc_size_##a | ||
#define STRINGIFY(a) a | ||
#define NAME_STRING(a) NAME(a) | ||
|
||
#ifdef WIN32 | ||
# define REDIRECT_MALLOC_SIZE(a, b) \ | ||
extern "C" void* NAME(a)(); \ | ||
__pragma(comment(linker, "/alternatename:##a=##b")) | ||
#else | ||
# define REDIRECT_MALLOC_SIZE(a, b) \ | ||
__attribute__((alias(#b))) extern "C" void* a() | ||
#endif | ||
|
||
#define DEFINE_MALLOC_SIZE(name, s) \ | ||
extern "C" void* name() \ | ||
{ \ | ||
return snmalloc::ThreadAlloc::get_noncachable()->template alloc<s>(); \ | ||
} | ||
|
||
extern "C" void free_local_small(void* ptr) | ||
{ | ||
if (snmalloc::Alloc::small_local_dealloc(ptr)) | ||
return; | ||
snmalloc::ThreadAlloc::get_noncachable()->small_local_dealloc_slow(ptr); | ||
} | ||
|
||
#ifdef WIN32 | ||
# define GENERATE_FREE_SIZE(a) \ | ||
extern "C" void* NAME(a)(); \ | ||
__pragma(comment(linker, "/alternatename:##a=free_local_small")) | ||
#else | ||
# define GENERATE_FREE_SIZE(a) \ | ||
__attribute__((alias("free_local_small"))) extern "C" void* a() | ||
#endif | ||
|
||
void* __stack_alloc_large(size_t size, size_t align) | ||
{ | ||
size_t asize = snmalloc::aligned_size(align, size); | ||
return snmalloc::ThreadAlloc::get_noncachable()->alloc(asize); | ||
} | ||
|
||
void __stack_free_large(void* ptr, size_t size, size_t align) | ||
{ | ||
size_t asize = snmalloc::aligned_size(align, size); | ||
snmalloc::ThreadAlloc::get_noncachable()->dealloc(ptr, asize); | ||
} | ||
|
||
#include "generated.cc" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to differentiate between executables for the build host and executables for the target when we're cross-compiling?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks possible:
https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/CrossCompiling#using-executables-in-the-build-created-during-the-build
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needing to build a C++ program on the host to generate things that we compile for the target is a bit painful for cross compiling (do we even guarantee that the sizes will be the same if, for example, we're compiling on a 32-bit system for a CHERI target?).
It's also going to be annoying to integrate into a libc build system.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently, all platforms and configurations, we build would use the same file. However, we can use this program to build a collection of headers that encode the parameters of interest. E.g. of the form
Currently, everything would use exactly the same thing:
But for CHERI, we might want to up the minimum allocation size. Getting either parameter wrong in the header would "work", however, we might
Perhaps, just check in the file in once, we have finished experimenting. While, we are experimenting, I think having this generated is good as it means we are less likely to make mistakes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note, we use the template parameter for
s
, so the code will be specialised for this size, even if it ends up being a medium alloc, or large.