Skip to content

Commit 9d296e7

Browse files
committed
mem api POC
1 parent 7857df6 commit 9d296e7

35 files changed

+924
-63
lines changed

FlameGraph

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 41fee1f99f9276008b7cd112fca19dc3ea84ac32

bench.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
#!/bin/bash
3+
#
4+
# Copyright (C) 2025 Intel Corporation
5+
#
6+
# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
7+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
#
9+
10+
# Check if at least one argument is provided
11+
if [ "$#" -lt 1 ]; then
12+
echo "Usage: $0 <application> [args...]"
13+
exit 1
14+
fi
15+
16+
# Variables
17+
APP="$1" # The application to run
18+
shift # Remove the application from the arguments list
19+
ARGS="$@" # Remaining arguments passed to the application
20+
USER="rrudnick" # The user to own the generated perf data
21+
FLAMEGRAPH_DIR="./FlameGraph" # Path to the FlameGraph repository
22+
23+
# Check if FlameGraph repository exists
24+
if [ ! -d "$FLAMEGRAPH_DIR" ]; then
25+
echo "Error: FlameGraph directory not found at $FLAMEGRAPH_DIR."
26+
echo "Clone it using: git clone https://github.com/brendangregg/FlameGraph.git"
27+
exit 1
28+
fi
29+
30+
# Run application under perf
31+
echo "Recording performance data..."
32+
sudo perf record -F 99 -g --call-graph dwarf -- "$APP" $ARGS
33+
34+
# Change ownership of the generated perf data
35+
echo "Changing ownership of perf data..."
36+
sudo chown "$USER" perf.data
37+
38+
# Process perf.data into a readable format
39+
echo "Processing perf data..."
40+
perf script > out.perf
41+
42+
# Generate folded stacks
43+
echo "Generating folded stacks..."
44+
"$FLAMEGRAPH_DIR/stackcollapse-perf.pl" out.perf > out.folded
45+
46+
# Generate the flame graph
47+
echo "Generating flame graph..."
48+
"$FLAMEGRAPH_DIR/flamegraph.pl" out.folded > flamegraph.svg
49+
50+
# Open the flame graph in Firefox
51+
echo "Opening flame graph in Firefox..."
52+
firefox flamegraph.svg &
53+
54+
echo "Done! The flame graph is saved as flamegraph.svg."
55+
56+

benchmark/ubench.c

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include <umf/ipc.h>
1717
#include <umf/memory_pool.h>
18+
#include <umf/memory_props.h>
1819
#include <umf/pools/pool_disjoint.h>
1920
#include <umf/pools/pool_proxy.h>
2021
#include <umf/pools/pool_scalable.h>
@@ -438,6 +439,47 @@ static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs,
438439
}
439440
}
440441

442+
static void do_umf_mem_props_benchmark(ze_context_handle_t context,
443+
bool use_umf, alloc_t *allocs,
444+
size_t num_allocs, size_t repeats) {
445+
assert(context != NULL);
446+
447+
for (size_t r = 0; r < repeats * 10; ++r) {
448+
for (size_t i = 0; i < num_allocs; ++i) {
449+
if (use_umf) {
450+
umf_memory_properties_handle_t props_handle = NULL;
451+
umf_result_t res =
452+
umfGetMemoryPropertiesHandle(allocs[i].ptr, &props_handle);
453+
(void)res;
454+
assert(res == UMF_RESULT_SUCCESS);
455+
456+
umf_usm_memory_type_t type = UMF_MEMORY_TYPE_UNKNOWN;
457+
res = umfGetMemoryProperty(
458+
props_handle, UMF_MEMORY_PROPERTY_POINTER_TYPE, &type);
459+
assert(res == UMF_RESULT_SUCCESS);
460+
if (type != UMF_MEMORY_TYPE_DEVICE) {
461+
fprintf(stderr,
462+
"error: unexpected alloc_props.type value: %d\n",
463+
type);
464+
exit(-1);
465+
}
466+
} else {
467+
ze_memory_allocation_properties_t alloc_props = {0};
468+
ze_device_handle_t device = 0;
469+
// calls zeMemGetAllocProperties()
470+
utils_ze_get_mem_props(context, allocs[i].ptr, &alloc_props,
471+
&device);
472+
if (alloc_props.type != ZE_MEMORY_TYPE_DEVICE) {
473+
fprintf(stderr,
474+
"error: unexpected alloc_props.type value: %d\n",
475+
alloc_props.type);
476+
exit(-1);
477+
}
478+
}
479+
}
480+
}
481+
}
482+
441483
static int create_level_zero_params(ze_context_handle_t *context,
442484
ze_device_handle_t *device) {
443485
uint32_t driver_idx = 0;
@@ -623,6 +665,198 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) {
623665
err_destroy_context:
624666
utils_ze_destroy_context(context);
625667
}
668+
669+
UBENCH_EX(mem_props, level_zero) {
670+
const size_t BUFFER_SIZE = 100;
671+
const size_t N_BUFFERS = 1000;
672+
673+
alloc_t *allocs = alloc_array(N_BUFFERS);
674+
if (allocs == NULL) {
675+
fprintf(stderr, "error: alloc_array() failed\n");
676+
}
677+
678+
ze_context_handle_t context = NULL;
679+
ze_device_handle_t device = NULL;
680+
int ret = create_level_zero_params(&context, &device);
681+
if (ret != 0) {
682+
fprintf(stderr, "error: create_level_zero_params() failed\n");
683+
exit(-1);
684+
}
685+
686+
ze_device_mem_alloc_desc_t dev_desc = {
687+
.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
688+
.pNext = NULL,
689+
.flags = 0,
690+
.ordinal = 0};
691+
692+
for (size_t i = 0; i < N_BUFFERS; ++i) {
693+
ze_result_t ze_result = zeMemAllocDevice(
694+
context, &dev_desc, BUFFER_SIZE, 0, device, &allocs[i].ptr);
695+
if (ze_result != ZE_RESULT_SUCCESS) {
696+
fprintf(stderr, "error: zeMemAllocDevice() failed\n");
697+
}
698+
allocs[i].size = BUFFER_SIZE;
699+
}
700+
701+
do_umf_mem_props_benchmark(context, false, allocs, N_BUFFERS,
702+
1); // WARMUP
703+
UBENCH_DO_BENCHMARK() {
704+
do_umf_mem_props_benchmark(context, false, allocs, N_BUFFERS,
705+
N_ITERATIONS);
706+
}
707+
708+
for (size_t i = 0; i < N_BUFFERS; ++i) {
709+
zeMemFree(context, allocs[i].ptr);
710+
}
711+
712+
free(allocs);
713+
utils_ze_destroy_context(context);
714+
}
715+
716+
UBENCH_EX(mem_props, disjoint_pool_with_level_zero_provider_use_umf) {
717+
const size_t BUFFER_SIZE = 4 * 1024;
718+
const size_t N_BUFFERS = 1000;
719+
umf_result_t umf_result;
720+
ze_context_handle_t context = NULL;
721+
ze_device_handle_t device = NULL;
722+
umf_level_zero_memory_provider_params_handle_t level_zero_params = NULL;
723+
724+
int ret = create_level_zero_params(&context, &device);
725+
if (ret != 0) {
726+
fprintf(stderr, "error: create_level_zero_params() failed\n");
727+
exit(-1);
728+
}
729+
730+
umf_result = umfLevelZeroMemoryProviderParamsCreate(&level_zero_params);
731+
if (umf_result != UMF_RESULT_SUCCESS) {
732+
fprintf(stderr,
733+
"error: umfLevelZeroMemoryProviderParamsCreate() failed\n");
734+
goto err_destroy_context;
735+
}
736+
737+
umf_result =
738+
umfLevelZeroMemoryProviderParamsSetContext(level_zero_params, context);
739+
if (umf_result != UMF_RESULT_SUCCESS) {
740+
fprintf(stderr,
741+
"error: umfLevelZeroMemoryProviderParamsSetContext() failed\n");
742+
goto err_destroy_params;
743+
}
744+
745+
umf_result =
746+
umfLevelZeroMemoryProviderParamsSetDevice(level_zero_params, device);
747+
if (umf_result != UMF_RESULT_SUCCESS) {
748+
fprintf(stderr,
749+
"error: umfLevelZeroMemoryProviderParamsSetDevice() failed\n");
750+
goto err_destroy_params;
751+
}
752+
753+
umf_result = umfLevelZeroMemoryProviderParamsSetMemoryType(
754+
level_zero_params, UMF_MEMORY_TYPE_DEVICE);
755+
if (umf_result != UMF_RESULT_SUCCESS) {
756+
fprintf(
757+
stderr,
758+
"error: umfLevelZeroMemoryProviderParamsSetMemoryType() failed\n");
759+
goto err_destroy_params;
760+
}
761+
762+
alloc_t *allocs = alloc_array(N_BUFFERS);
763+
if (allocs == NULL) {
764+
fprintf(stderr, "error: alloc_array() failed\n");
765+
goto err_destroy_context;
766+
}
767+
768+
umf_memory_provider_handle_t provider = NULL;
769+
umf_result = umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(),
770+
level_zero_params, &provider);
771+
if (umf_result != UMF_RESULT_SUCCESS) {
772+
fprintf(stderr, "error: umfMemoryProviderCreate() failed\n");
773+
goto err_free_allocs;
774+
}
775+
776+
umf_disjoint_pool_params_handle_t disjoint_params = NULL;
777+
umf_result = umfDisjointPoolParamsCreate(&disjoint_params);
778+
if (umf_result != UMF_RESULT_SUCCESS) {
779+
fprintf(stderr, "ERROR: umfDisjointPoolParamsCreate failed\n");
780+
goto err_provider_destroy;
781+
}
782+
783+
umf_result =
784+
umfDisjointPoolParamsSetSlabMinSize(disjoint_params, BUFFER_SIZE * 10);
785+
if (umf_result != UMF_RESULT_SUCCESS) {
786+
fprintf(stderr,
787+
"error: umfDisjointPoolParamsSetSlabMinSize() failed\n");
788+
goto err_params_destroy;
789+
}
790+
791+
umf_result = umfDisjointPoolParamsSetMaxPoolableSize(
792+
disjoint_params, 4ull * 1024ull * 1024ull);
793+
if (umf_result != UMF_RESULT_SUCCESS) {
794+
fprintf(stderr,
795+
"error: umfDisjointPoolParamsSetMaxPoolableSize() failed\n");
796+
goto err_params_destroy;
797+
}
798+
799+
umf_result =
800+
umfDisjointPoolParamsSetCapacity(disjoint_params, 64ull * 1024ull);
801+
if (umf_result != UMF_RESULT_SUCCESS) {
802+
fprintf(stderr, "error: umfDisjointPoolParamsSetCapacity() failed\n");
803+
goto err_params_destroy;
804+
}
805+
806+
umf_result = umfDisjointPoolParamsSetMinBucketSize(disjoint_params, 64);
807+
if (umf_result != UMF_RESULT_SUCCESS) {
808+
fprintf(stderr,
809+
"error: umfDisjointPoolParamsSetMinBucketSize() failed\n");
810+
goto err_params_destroy;
811+
}
812+
813+
umf_pool_create_flags_t flags = UMF_POOL_CREATE_FLAG_NONE;
814+
umf_memory_pool_handle_t pool;
815+
umf_result = umfPoolCreate(umfDisjointPoolOps(), provider, disjoint_params,
816+
flags, &pool);
817+
if (umf_result != UMF_RESULT_SUCCESS) {
818+
fprintf(stderr, "error: umfPoolCreate() failed\n");
819+
goto err_params_destroy;
820+
}
821+
822+
for (size_t i = 0; i < N_BUFFERS; ++i) {
823+
allocs[i].ptr = umfPoolMalloc(pool, BUFFER_SIZE);
824+
if (allocs[i].ptr == NULL) {
825+
goto err_buffer_destroy;
826+
}
827+
allocs[i].size = BUFFER_SIZE;
828+
}
829+
830+
do_umf_mem_props_benchmark(context, true, allocs, N_BUFFERS,
831+
1); // WARMUP
832+
UBENCH_DO_BENCHMARK() {
833+
do_umf_mem_props_benchmark(context, true, allocs, N_BUFFERS,
834+
N_ITERATIONS);
835+
}
836+
837+
err_buffer_destroy:
838+
for (size_t i = 0; i < N_BUFFERS; ++i) {
839+
umfPoolFree(pool, allocs[i].ptr);
840+
}
841+
842+
umfPoolDestroy(pool);
843+
844+
err_params_destroy:
845+
umfDisjointPoolParamsDestroy(disjoint_params);
846+
847+
err_provider_destroy:
848+
umfMemoryProviderDestroy(provider);
849+
850+
err_free_allocs:
851+
free(allocs);
852+
853+
err_destroy_params:
854+
umfLevelZeroMemoryProviderParamsDestroy(level_zero_params);
855+
856+
err_destroy_context:
857+
//utils_ze_destroy_context(context);
858+
}
859+
626860
#endif /* (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */
627861

628862
// TODO add IPC benchmark for CUDA

docs/config/api.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ memory as well as functions that create, destroy and operate on the pool.
2828
.. doxygenfile:: memory_pool.h
2929
:sections: define enum typedef func var
3030

31+
TODO
32+
------------------------------------------
33+
34+
TODO
35+
36+
.. doxygenfile:: memory_props.h
37+
:sections: define enum typedef func var
38+
3139
Disjoint Pool
3240
------------------------------------------
3341

include/umf/base.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,40 @@ typedef enum umf_result_t {
5151
UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown error
5252
} umf_result_t;
5353

54+
/// @brief TODO
55+
typedef struct umf_memory_properties_t *umf_memory_properties_handle_t;
56+
57+
/// @brief TODO
58+
// write about experimental api
59+
typedef enum umf_memory_property_id_t {
60+
UMF_MEMORY_PROPERTY_INVALID = -1, ///< TODO
61+
62+
// UMF specyfic
63+
UMF_MEMORY_PROVIDER_HANDLE,
64+
UMF_MEMORY_PROVIDER_OPS, // == type?
65+
UMF_MEMORY_POOL_HANDLE,
66+
UMF_MEMORY_POOL_OPS, // == type?
67+
68+
// generic pointer properties
69+
UMF_MEMORY_PROPERTY_POINTER_TYPE, // unreg host , reg host ??, dev, managed or umf_usm_memory_type_t?
70+
UMF_MEMORY_PROPERTY_BASE_ADDRESS, // base address
71+
UMF_MEMORY_PROPERTY_BASE_SIZE, // base size
72+
73+
// GPU specific
74+
UMF_MEMORY_PROPERTY_CONTEXT,
75+
UMF_MEMORY_PROPERTY_DEVICE, // handle (ze) or id (cuda)
76+
UMF_MEMORY_PROPERTY_BUFFER_ID, // unique id NOTE: this id is unique across all UMF allocs and != L0 or CUDA ID
77+
UMF_MEMORY_PROPERTY_DEVICE_ATTRIBUTES, // ze_memory_allocation_properties_t ?
78+
79+
// all cuda + l0
80+
// next other providers?
81+
// todo return type?
82+
83+
/// @cond
84+
UMF_MEMORY_PROPERTY_MAX_RESERVED = 0x1000, ///< Maximum reserved value
85+
/// @endcond
86+
} umf_memory_property_id_t;
87+
5488
/// @brief Type of the CTL query
5589
typedef enum umf_ctl_query_type {
5690
CTL_QUERY_READ,

0 commit comments

Comments
 (0)