Skip to content

Commit

Permalink
final cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
MarioSieg committed Feb 18, 2025
1 parent ca0b7ca commit 5342fd4
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 33 deletions.
2 changes: 0 additions & 2 deletions benchmark/benchmarks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ auto main() -> int {
.unit("MM")
.warmup(100)
.performanceCounters(true);

bench_op(bench, 20000);
bench_op(bench, 10000);
bench_op(bench, 1000);
bench_op(bench, 750);
Expand Down
21 changes: 10 additions & 11 deletions cmake/blas_tune.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,29 @@ function(set_blas_spec_arch filename posix_arch msvc_arch)
endif()
endfunction()

set(MAGNETRON_BLAS_SPEC_AMD64_SOURCES
set(MAG_BLAS_SPEC_AMD64_SOURCES
magnetron/magnetron_cpu_blas_amd64_v2.c
magnetron/magnetron_cpu_blas_amd64_v2_5.c
magnetron/magnetron_cpu_blas_amd64_v3.c
magnetron/magnetron_cpu_blas_amd64_v4.c
magnetron/magnetron_cpu_blas_amd64_v4_5.c
)

set(MAGNETRON_BLAS_SPEC_ARM64_SOURCES
set(MAG_BLAS_SPEC_ARM64_SOURCES
magnetron/magnetron_cpu_blas_arm64_v8_2.c
magnetron/magnetron_cpu_blas_arm64_v9.c
)

if(${IS_AMD64}) # x86-64 specific compilation options
include(CheckCCompilerFlag)
# Check for support of -mavx512bf16, some older GCC versions don't support it
check_c_compiler_flag("-mavx512bf16" COMPILER_SUPPORTS_MAVX512BF16)
if(NOT COMPILER_SUPPORTS_MAVX512BF16)
message(WARNING "Compiler does NOT support -mavx512bf16; removing it from the flags for magnetron_cpu_blas_amd64_v4_5.c")
set(AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
check_c_compiler_flag("-mavx512bf16" MAG_COMPILER_SUPPORTS_MAVX512BF16) # Check for support of -mavx512bf16, some older GCC versions don't support it
if(NOT MAG_COMPILER_SUPPORTS_MAVX512BF16)
message(WARNING "Compiler does NOT support -mavx512bf16. Please upgrade to a newer compiler. Some optimizations are disabled.")
set(MAG_AMD64_V45_AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
else()
set(AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx512bf16 -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
set(MAG_AMD64_V45_AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx512bf16 -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
endif()
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_AMD64_SOURCES})
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAG_BLAS_SPEC_AMD64_SOURCES})
set_blas_spec_arch("magnetron_cpu_blas_amd64_v2.c"
"-mtune=nehalem -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2"
"/arch:SSE4.2")
Expand All @@ -46,10 +45,10 @@ if(${IS_AMD64}) # x86-64 specific compilation options
"-mtune=cannonlake -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe"
"/arch:AVX512")
set_blas_spec_arch("magnetron_cpu_blas_amd64_v4_5.c"
"${AVX512_FLAGS}"
"${MAG_AMD64_V45_AVX512_FLAGS}"
"/arch:AVX512")
elseif(${IS_ARM64})
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_ARM64_SOURCES})
set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAG_BLAS_SPEC_ARM64_SOURCES})
set_blas_spec_arch("magnetron_cpu_blas_arm64_v8_2.c" "-march=armv8.2-a+dotprod+fp16" "")
set_blas_spec_arch("magnetron_cpu_blas_arm64_v9.c" "-march=armv9-a+sve+sve2" "")
endif()
2 changes: 1 addition & 1 deletion python/benchmarks/bench_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def plot(self, flops_per_op: int = 2, plot_style: str = 'bars') -> None:
ax2.legend()
ax2.grid(True)

plt.suptitle(f'{mag.Context.active().cpu_name}', y=1.05)
plt.suptitle(f'{mag.Context.primary().cpu_name}', y=1.05)
plt.tight_layout()
plt.show()

Expand Down
5 changes: 3 additions & 2 deletions python/examples/xor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ def forward(self, x: Tensor) -> Tensor:

y = Tensor.const([[0], [1], [1], [0]], name='y')

epochs: int = 2
epochs: int = 1

y_hat = model(x)
print(y_hat)
for epoch in range(epochs):
y_hat = model(x)
loss = mse_loss(y_hat, y)
loss.backward()
loss.export_graphviz(f'xor_{epoch}.dot')
if epoch == 0:
loss.export_graphviz(f'xor_{epoch}.dot')
optim.step()
optim.zero_grad()
if epoch % 1000 == 0:
Expand Down
31 changes: 16 additions & 15 deletions python/src/magnetron/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import weakref
from dataclasses import dataclass
from enum import Enum, auto, unique
from functools import lru_cache
from os import getenv
from os.path import isfile

Expand Down Expand Up @@ -83,14 +84,14 @@ class GlobalConfig:

@typing.final
class Context:
_active: 'Context' = None
"""Manages the execution context and owns all tensors and active compute devices."""

@staticmethod
def active() -> 'Context':
if Context._active is None:
C.mag_set_log_mode(GlobalConfig.verbose)
Context._active = Context(GlobalConfig.compute_device)
return Context._active
@lru_cache(maxsize=1)
def primary() -> 'Context':
"""Get global context singleton."""
C.mag_set_log_mode(GlobalConfig.verbose)
return Context(GlobalConfig.compute_device)

def __init__(
self,
Expand Down Expand Up @@ -212,11 +213,11 @@ class no_grad(contextlib.ContextDecorator):

def __enter__(self) -> None:
"""Disable gradient tracking by stopping the active context's recorder."""
Context.active().stop_grad_recorder()
Context.primary().stop_grad_recorder()

def __exit__(self, exc_type: any, exc_value: any, traceback: any) -> None:
"""Re-enable gradient tracking when exiting the context."""
Context.active().start_grad_recorder()
Context.primary().start_grad_recorder()


class Tensor:
Expand Down Expand Up @@ -276,7 +277,7 @@ def empty(
) -> 'Tensor':
tensor = cls(None)
tensor._new(
Context.active(),
Context.primary(),
shape=shape,
dtype=dtype,
requires_grad=requires_grad,
Expand All @@ -296,7 +297,7 @@ def full(
) -> 'Tensor':
tensor = cls(None)
tensor._new(
Context.active(),
Context.primary(),
shape=shape,
dtype=dtype,
requires_grad=requires_grad,
Expand Down Expand Up @@ -334,7 +335,7 @@ def flatten_nested_lists(nested: object) -> tuple[tuple[int, ...], list[float]]:
shape, flattened_data = flatten_nested_lists(data)
tensor = cls(None)
tensor._new(
Context.active(),
Context.primary(),
shape=shape,
dtype=dtype,
requires_grad=requires_grad,
Expand Down Expand Up @@ -371,7 +372,7 @@ def uniform(
) -> 'Tensor':
tensor = cls(None)
tensor._new(
Context.active(),
Context.primary(),
shape=shape,
dtype=dtype,
requires_grad=requires_grad,
Expand All @@ -394,7 +395,7 @@ def normal(
) -> 'Tensor':
tensor = cls(None)
tensor._new(
Context.active(),
Context.primary(),
shape=shape,
dtype=DType.F32,
requires_grad=requires_grad,
Expand All @@ -406,7 +407,7 @@ def normal(
@classmethod
def load(cls, file_path: str) -> 'Tensor':
assert file_path.endswith('.magnetron'), 'File must be a magnetron file'
instance = C.mag_tensor_load(Context.active()._ptr, bytes(file_path, 'utf-8'))
instance = C.mag_tensor_load(Context.primary()._ptr, bytes(file_path, 'utf-8'))
return cls(ptr=instance)

@classmethod
Expand All @@ -420,7 +421,7 @@ def load_image(
) -> 'Tensor':
assert isfile(file_path), f'File not found: {file_path}'
instance = C.mag_tensor_load_image(
Context.active()._ptr,
Context.primary()._ptr,
bytes(file_path, 'utf-8'),
channels.value,
resize_to[0],
Expand Down
6 changes: 5 additions & 1 deletion python/tests/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

from magnetron import *

GlobalConfig.verbose = True


def test_context_creation() -> None:
# Test that a context can be created and defaults are correct.
ctx = Context.active()
ctx = Context.primary()
ctx = Context.primary()
ctx = Context.primary()
assert ctx.execution_mode.name in ('EAGER', 'DEFERRED')
assert isinstance(ctx.os_name, str)
assert isinstance(ctx.cpu_name, str)
Expand Down
1 change: 1 addition & 0 deletions python/tests/tensor_ops3.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def test_matmul_scalar_by_matrix() -> None:
assert mag_result.shape == np_result.shape
np.testing.assert_allclose(tonumpy(mag_result), np_result, atol=EPS)


"""
def test_matmul_x_transposed() -> None:
shape_a = (4, 2)
Expand Down
3 changes: 2 additions & 1 deletion python/tests/xor_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from magnetron import Tensor, Context

np.random.seed(932002)
Context.active().seed(932002)
Context.primary().seed(932002)

LR: float = 0.1
EPOCHS: int = 10000
Expand Down Expand Up @@ -107,6 +107,7 @@ def predict(x: Tensor) -> Tensor:

return [predict(Tensor.const([xr]))[0] for xr in INPUT]


"""
def test_xor_nn() -> None:
np_out = xor_nn_np()
Expand Down

0 comments on commit 5342fd4

Please sign in to comment.