Skip to content

Commit

Permalink
Compute graph folding
Browse files Browse the repository at this point in the history
  • Loading branch information
MarioSieg committed Feb 13, 2025
1 parent 29987b4 commit 624fbed
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 60 deletions.
250 changes: 204 additions & 46 deletions magnetron/magnetron.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,134 @@ mag_static_assert(sizeof(mag_bitset_t) == 4);
#define mag_bitset_clear(sets, i) (sets[(i)>>5]&=~(1u<<((i)&((4<<3)-1))))
#define mag_bitset_toggle(sets, i) (sets[(i)>>5]^=(1u<<((i)&((4<<3)-1))))

typedef struct mag_hashset_t {
size_t len;
mag_bitset_t* used;
const mag_tensor_t** keys;
} mag_hashset_t;
#define MAG_HASHSET_FULL ((size_t)-1)
#define MAG_HASHSET_DUPLICATE ((size_t)-2)
#define MAG_HASHSET_MAX ((size_t)-3) /* Must be last. */
#define mag_hashset_hash_fn(ptr) ((size_t)(uintptr_t)(ptr)>>3)

static size_t mag_hashset_compute_hash_size(size_t sz) {
mag_assert2(sz > 0 && sz < MAG_HASHSET_MAX);
static const size_t prime_lut[] = {
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
2053, 4099, 8209, 16411, 32771, 65537, 131101,
262147, 524309, 1048583, 2097169, 4194319, 8388617,
16777259, 33554467, 67108879, 134217757, 268435459,
536870923, 1073741827, 2147483659
};
size_t l = 0;
size_t r = sizeof(prime_lut)/sizeof(*prime_lut);
while (l < r) { /* Binary search for the smallest prime > sz. */
size_t mid = (l+r)>>1;
if (prime_lut[mid] < sz) l = mid+1;
else r = mid;
}
return l < sizeof(prime_lut)/sizeof(*prime_lut) ? prime_lut[l] : sz|1;
}
static mag_hashset_t mag_hashset_init(size_t size) {
size = mag_hashset_compute_hash_size(size);
mag_hashset_t set = {
.len = size,
.used = (*mag_alloc)(NULL, mag_bitset_size(size)*sizeof(*set.used)),
.keys = (*mag_alloc)(NULL, size*sizeof(*set.keys)),
};
memset(set.used, 0, mag_bitset_size(size)*sizeof(*set.used));
return set;
}
static size_t mag_hashset_lookup(mag_hashset_t* set, const mag_tensor_t* key) {
size_t k = mag_hashset_hash_fn(key) % set->len, i = k;
while (mag_bitset_get(set->used, i) && set->keys[i] != key) { /* Linear probing. */
i = (i+1) % set->len;
if (i == k) return MAG_HASHSET_FULL;
}
return i;
}
static bool mag_hashset_contains_key(mag_hashset_t* set, const mag_tensor_t* key) {
size_t i = mag_hashset_lookup(set, key);
return mag_bitset_get(set->used, i) && i != MAG_HASHSET_FULL;
}
static size_t mag_hashset_insert(mag_hashset_t* set, const mag_tensor_t* key) {
size_t k = mag_hashset_hash_fn(key) % set->len, i = k;
do { /* Simple linear probing */
if (!mag_bitset_get(set->used, i)) { /* Insert key. */
mag_bitset_set(set->used, i);
set->keys[i] = key;
return i;
}
if (set->keys[i] == key) return MAG_HASHSET_DUPLICATE; /* Key already exists. */
i = (i+1) % set->len;
} while (i != k);
mag_panic("Insertion target not found");
}
static void mag_hashset_reset(mag_hashset_t* set) {
memset(set->used, 0, mag_bitset_size(set->len)*sizeof(*set->used));
}
static void mag_hashset_free(mag_hashset_t* set) {
(*mag_alloc)(set->used, 0);
(*mag_alloc)(set->keys, 0);
}

static void mag_compute_graph_fold_impl(
const mag_tensor_t* node,
mag_hashset_t* visited,
const mag_tensor_t** nodes,
const mag_tensor_t** leafs,
size_t* n_nodes,
size_t* n_leafs,
bool forward,
size_t cap
) {
if (!node) return;
if (mag_hashset_insert(visited, node) == MAG_HASHSET_FULL) return; /* Already visited */
for (unsigned i = 0; i < MAG_MAX_INPUT_TENSORS; ++i) {
unsigned k = forward ? i : MAG_MAX_INPUT_TENSORS-i-1;
if (node->op_inputs[k]) /* Recurse to parent nodes */
mag_compute_graph_fold_impl(node->op_inputs[k], visited, nodes, leafs, n_nodes, n_leafs, forward, cap);
}
if (node->op == MAG_OP_NOP) { /* Leaf node */
mag_assert(*n_leafs < cap, "Graph fold cap exhausted: %zu, increase cap", cap);
leafs[(*n_leafs)++] = node;
} else { /* Connected node */
mag_assert(*n_nodes < cap, "Graph fold cap exhausted: %zu, increase cap", cap);
nodes[(*n_nodes)++] = node;
}
}

static void mag_compute_graph_fold(
size_t cap,
const mag_tensor_t* root,
void (*f)(const mag_tensor_t** nodes, size_t n_nodes, const mag_tensor_t** leafs, size_t n_leafs, void* ud),
bool forward,
void* ud
) {
mag_hashset_t visited = mag_hashset_init(cap);
size_t n_nodes = 0, n_leafs = 0;
const mag_tensor_t** nodes = (*mag_alloc)(NULL, sizeof(*nodes)*cap);
const mag_tensor_t** leafs = (*mag_alloc)(NULL, sizeof(*leafs)*cap);
memset(nodes, 0, sizeof(*nodes)*cap);
memset(leafs, 0, sizeof(*leafs)*cap);
mag_compute_graph_fold_impl(root, &visited, nodes, leafs, &n_nodes, &n_leafs, forward, cap);
(*f)(nodes, n_nodes, leafs, n_leafs, ud);
(*mag_alloc)(nodes, 0);
(*mag_alloc)(leafs, 0);
mag_hashset_free(&visited);
}

static bool mag_compute_graph_contains(
const mag_tensor_t** nodes,
size_t n_nodes,
const mag_tensor_t* node
) {
for (size_t i=0; i < n_nodes; ++i)
if (nodes[i] == node)
return true;
return false;
}

/* Eval Chebyshev coeffs steps for some x. f(x) : [a, b] -> ℝ. */
static double mag_chebyshev_eval(double x, double a, double b, const double* coeffs, uint32_t steps) {
double scale = 4.0/(b - a);
Expand Down Expand Up @@ -2686,9 +2814,9 @@ void mag_tensor_img_draw_box(mag_tensor_t* t, int32_t x1, int32_t y1, int32_t x2
mag_assert(t->rank == 3, "Tensor must be 3D image tensor");
mag_assert2(x2 > x1 && y2 > y1 && x1 > 0 && y1 > 0 && x2 > 0 && y2 > 0);
float* buf = mag_tensor_data_ptr(t);
int32_t w = (int32_t)mag_tensor_image_width(t);
int32_t h = (int32_t)mag_tensor_image_height(t);
int32_t c = (int32_t)mag_tensor_image_channels(t);
int32_t w = (int32_t)mag_tensor_width(t);
int32_t h = (int32_t)mag_tensor_height(t);
int32_t c = (int32_t)mag_tensor_channels(t);
mag_assert2(w && h && c == 3);
float r = (float)((rgb>>16)&0xff) / 255.0f;
float g = (float)((rgb>>8)&0xff) / 255.0f;
Expand Down Expand Up @@ -2771,9 +2899,9 @@ void mag_tensor_img_draw_text(mag_tensor_t* t, int32_t x, int32_t y, int32_t siz
mag_assert2(x >= 0 && y >= 0 && size >= 8 && txt && *txt);
mag_assert2(t->ctx->device_type == MAG_COMPUTE_DEVICE_TYPE_CPU);
float* buf = (float*)t->storage.base;
int32_t w = (int32_t)mag_tensor_image_width(t);
int32_t h = (int32_t)mag_tensor_image_height(t);
int32_t c = (int32_t)mag_tensor_image_channels(t);
int32_t w = (int32_t)mag_tensor_width(t);
int32_t h = (int32_t)mag_tensor_height(t);
int32_t c = (int32_t)mag_tensor_channels(t);
mag_assert2(w && h && c == 3);
float* pr = buf;
float* pg = buf + w*h;
Expand Down Expand Up @@ -3706,9 +3834,9 @@ void mag_tensor_save_image(const mag_tensor_t* t, const char* file) {
mag_assert(saver, "Image saver not set");
int64_t rank = mag_tensor_rank(t);
mag_assert(rank == 3, "Tensor rank must be 3, but is: %" PRIi64, (size_t)rank);
int64_t w = mag_tensor_image_width(t);
int64_t h = mag_tensor_image_height(t);
int64_t c = mag_tensor_image_channels(t);
int64_t w = mag_tensor_width(t);
int64_t h = mag_tensor_height(t);
int64_t c = mag_tensor_channels(t);
mag_assert(c == 1 || c == 3 || c == 4, "Invalid number of channels: %zu", (size_t)c);
mag_assert(w*h*c == mag_tensor_numel(t), "Buffer size mismatch: %zu != %zu", w*h*c, (size_t)mag_tensor_numel(t));
uint8_t* dst = (*mag_alloc)(NULL, w*h*c); /* Allocate memory for image data */
Expand All @@ -3722,54 +3850,84 @@ void mag_tensor_save_image(const mag_tensor_t* t, const char* file) {
mag_log_info("Saved tensor to image: %s, width: %d, height: %d, channels: %d", file, (int)w, (int)h, (int)c);
}

static void mag_graphviz_dump(const mag_tensor_t* node, FILE *fp, const mag_tensor_t*** visited, size_t* len, size_t* cap) { /* TODO this function generates a wrong non DAG sometimes, needs to be fixed */
for (size_t i = 0; i < *len; ++i)
if ((*visited)[i] == node)
return;
if (*len >= *cap)
*visited = (*mag_alloc)(*visited, ((*cap <<= 1) * sizeof(mag_tensor_t*)));
(*visited)[*len] = node;
(*len)++;
bool is_input = true;
for (unsigned i = 0; i < MAG_MAX_INPUT_TENSORS; i++) {
if (node->op_inputs[i] != NULL) {
is_input = false;
break;
static MAG_COLDPROC void mag_graphviz_dump_node_edge(FILE* f, const mag_tensor_t* node, const mag_tensor_t* parent, const char* label) {
const mag_tensor_t* gparent = NULL;
const mag_tensor_t* gparent0 = NULL;
fprintf(
f,
" \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
gparent0 ? (void*)gparent0 : (void*)parent,
gparent0 ? "g" : "x",
gparent ? (void*)gparent : (void*)node,
gparent ? "g" : "x",
gparent ? "empty" : "vee",
gparent ? "dashed" : "solid",
label
);
}

static void mag_graphviz_dump_leaf_edge(FILE* f, const mag_tensor_t* node, const mag_tensor_t* parent, const char* label) {
fprintf(
f,
" \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n",
(void*)parent, "x",
(void*)node, "x",
label
);
}

static MAG_COLDPROC void mag_tensor_dump_folded_graph(
const mag_tensor_t** nodes,
size_t n_nodes,
const mag_tensor_t** leafs,
size_t n_leafs,
void* ud
) {
FILE* f = ud;
for (size_t i=0; i < n_nodes; ++i) {
const mag_tensor_t* node = nodes[i];
fprintf(f, " \"%p\" [ style = filled; fillcolor = %s; shape = record; label=\"", (void*)node, "skyblue2");
fprintf(f, "\"; ]\n");
}
for (size_t i=0; i < n_leafs; ++i) {
const mag_tensor_t* node = leafs[i];
fprintf(f, " \"%p\" [ style = filled; fillcolor = %s; shape = record; label=\"<x>", (void*)node, "pink");
fprintf(f, "\"; ]\n");
}
for (size_t i=0; i < n_nodes; ++i) {
const mag_tensor_t* node = nodes[i];
for (size_t j=0; j < MAG_MAX_INPUT_TENSORS; ++j) {
const mag_tensor_t* input = node->op_inputs[j];
if (input) {
char label[16];
snprintf(label, sizeof(label), "src %zu", j);
mag_graphviz_dump_node_edge(f, node, input, label);
}
}
}
const char* fillcolor = is_input ? "palegreen" : "skyblue2";
char dim_buf[150];
mag_fmt_dims(&dim_buf, &node->shape, node->rank);
bool gra = node->flags & MAG_TFLAG_REQUIRES_GRAD;
fprintf(fp, " \"%p\" [label=\"⊕ %s|%s|S %s|F 0x%x\", shape=record, style=\"rounded,filled\", fillcolor=%s];\n", (void*)node, mag_op_meta_of(node->op)->mnemonic, gra ? "→∇" : "X", dim_buf, node->flags, fillcolor);
const char* vars = "xy";
mag_assert2(strlen(vars) == MAG_MAX_INPUT_TENSORS);
for (unsigned i = 0; i < MAG_MAX_INPUT_TENSORS; ++i) {
mag_tensor_t* input = node->op_inputs[i];
if (!input)
continue;
char name[64];
if (*input->name)
snprintf(name, sizeof(name), "%s", input->name);
else
snprintf(name, sizeof(name), "%c", vars[i]);
fprintf(fp, " \"%p\" -> \"%p\" [label=\"%s\"];\n", (void*)input, (void*)node, name);
mag_graphviz_dump(input, fp, visited, len, cap);
for (size_t i=0; i < n_leafs; ++i) {
const mag_tensor_t* node = leafs[i];
for (size_t j=0; j < MAG_MAX_INPUT_TENSORS; ++j) {
const mag_tensor_t* input = node->op_inputs[j];
if (input) {
char label[16];
snprintf(label, sizeof(label), "src %zu", j);
mag_graphviz_dump_leaf_edge(f, node, input, label);
}
}
}
}

MAG_COLDPROC void mag_tensor_export_graphviz(const mag_tensor_t* t, const char* file) {
mag_assert2(t && file && *file);
FILE* f = mag_fopen(file, "w");
size_t len = 0, cap = 128;
const mag_tensor_t** visited = (*mag_alloc)(NULL, cap * sizeof(mag_tensor_t*));
FILE* f = mag_fopen(file, "wt");
fprintf(f, "digraph computation_graph {\n");
fprintf(f, " rankdir=LR;\n");
fprintf(f, " newrank=true;\n");
fprintf(f, " rankdir=TD;\n");
fprintf(f, " node [fontname=\"Helvetica\", shape=box];\n");
fprintf(f, " edge [fontname=\"Helvetica\"];\n");
mag_graphviz_dump(t, f, &visited, &len, &cap);
mag_compute_graph_fold(0xffff, t, &mag_tensor_dump_folded_graph, true, f);
fprintf(f, "}\n");
(*mag_alloc)(visited, 0);
fclose(f);
}

Expand Down
6 changes: 3 additions & 3 deletions magnetron/magnetron.h
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,9 @@ extern MAG_EXPORT mag_tensor_t* mag_tensor_load(mag_ctx_t* ctx, const char* file
extern MAG_EXPORT mag_tensor_t* mag_tensor_load_image(mag_ctx_t* ctx, const char* file, mag_color_channels_t channels, uint32_t resize_w, uint32_t resize_h); /* Create a tensor from an image file. */
extern MAG_EXPORT void mag_tensor_save_image(const mag_tensor_t* t, const char* file); /* Save tensor data as an image */
extern MAG_EXPORT void mag_tensor_export_graphviz(const mag_tensor_t* t, const char* file); /* Export tensor computation graph as Graphviz DOT file */
#define mag_tensor_image_width(tensor) (mag_tensor_shape(tensor)[2]) /* Get image width from tensor */
#define mag_tensor_image_height(tensor) (mag_tensor_shape(tensor)[1]) /* Get image height from tensor */
#define mag_tensor_image_channels(tensor) (mag_tensor_shape(tensor)[0]) /* Get image channels from tensor */
#define mag_tensor_width(tensor) (mag_tensor_shape(tensor)[2]) /* Get image width from tensor */
#define mag_tensor_height(tensor) (mag_tensor_shape(tensor)[1]) /* Get image height from tensor */
#define mag_tensor_channels(tensor) (mag_tensor_shape(tensor)[0]) /* Get image channels from tensor */

#ifdef __cplusplus
}
Expand Down
3 changes: 2 additions & 1 deletion python/examples/xor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@ def forward(self, x: Tensor) -> Tensor:
[0]
], name='y')

epochs: int = 2000
epochs: int = 1

y_hat = model(x)
print(y_hat)
for epoch in range(epochs):
y_hat = model(x)
loss = mse_loss(y_hat, y)
loss.export_graphviz('loss.dot')
loss.backward()
optim.step()
optim.zero_grad()
Expand Down
2 changes: 1 addition & 1 deletion test/unit/autograd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ TEST(autograd, bin_ops2) {
mag_tensor_set_requires_grad(y, true);
mag_tensor_backward(y);

//mag_tensor_export_graphviz(y, "autograd2.dot");
mag_tensor_export_graphviz(y, "autograd2.dot");

// check forward pass
float vx = mag_tensor_get_scalar_virtual_index(x, 0);
Expand Down
18 changes: 9 additions & 9 deletions test/unit/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ TEST(image, load) {

mag_tensor_t* img = mag_tensor_load_image(ctx, "test_data/test_img.png", MAG_COLOR_CHANNELS_RGB, 0, 0);
mag_tensor_print(img, true, true);
ASSERT_EQ(mag_tensor_image_channels(img), 3);
ASSERT_EQ(mag_tensor_image_width(img), 2);
ASSERT_EQ(mag_tensor_image_height(img), 4);
ASSERT_EQ(mag_tensor_shape(img)[2], mag_tensor_image_width(img));
ASSERT_EQ(mag_tensor_shape(img)[1], mag_tensor_image_height(img));
ASSERT_EQ(mag_tensor_shape(img)[0], mag_tensor_image_channels(img)); // RGB
ASSERT_EQ(mag_tensor_channels(img), 3);
ASSERT_EQ(mag_tensor_width(img), 2);
ASSERT_EQ(mag_tensor_height(img), 4);
ASSERT_EQ(mag_tensor_shape(img)[2], mag_tensor_width(img));
ASSERT_EQ(mag_tensor_shape(img)[1], mag_tensor_height(img));
ASSERT_EQ(mag_tensor_shape(img)[0], mag_tensor_channels(img)); // RGB

auto* buf = mag_tensor_data_ptr(img);
for (int64_t i=0; i < mag_tensor_numel(img); ++i) {
Expand All @@ -33,9 +33,9 @@ TEST(image, load_resize) {
ASSERT_EQ(mag_tensor_shape(img)[2], 256);
ASSERT_EQ(mag_tensor_shape(img)[1], 211);
ASSERT_EQ(mag_tensor_shape(img)[0], 3); // RGB
ASSERT_EQ(mag_tensor_shape(img)[2], mag_tensor_image_width(img));
ASSERT_EQ(mag_tensor_shape(img)[1], mag_tensor_image_height(img));
ASSERT_EQ(mag_tensor_shape(img)[0], mag_tensor_image_channels(img)); // RGB
ASSERT_EQ(mag_tensor_shape(img)[2], mag_tensor_width(img));
ASSERT_EQ(mag_tensor_shape(img)[1], mag_tensor_height(img));
ASSERT_EQ(mag_tensor_shape(img)[0], mag_tensor_channels(img)); // RGB

// mag_tensor_save_image(img, "test_data/car_resized.jpg");

Expand Down

0 comments on commit 624fbed

Please sign in to comment.