final cleanup

MarioSieg · Feb 18, 2025 · 5342fd4 · 5342fd4
1 parent ca0b7ca
commit 5342fd4
Show file tree

Hide file tree

Showing 8 changed files with 38 additions and 33 deletions.
diff --git a/benchmark/benchmarks.cpp b/benchmark/benchmarks.cpp
@@ -34,8 +34,6 @@ auto main() -> int {
         .unit("MM")
         .warmup(100)
         .performanceCounters(true);
-
-    bench_op(bench, 20000);
     bench_op(bench, 10000);
     bench_op(bench, 1000);
     bench_op(bench, 750);

diff --git a/cmake/blas_tune.cmake b/cmake/blas_tune.cmake
@@ -9,30 +9,29 @@ function(set_blas_spec_arch filename posix_arch msvc_arch)
     endif()
 endfunction()
 
-set(MAGNETRON_BLAS_SPEC_AMD64_SOURCES
+set(MAG_BLAS_SPEC_AMD64_SOURCES
     magnetron/magnetron_cpu_blas_amd64_v2.c
     magnetron/magnetron_cpu_blas_amd64_v2_5.c
     magnetron/magnetron_cpu_blas_amd64_v3.c
     magnetron/magnetron_cpu_blas_amd64_v4.c
     magnetron/magnetron_cpu_blas_amd64_v4_5.c
 )
 
-set(MAGNETRON_BLAS_SPEC_ARM64_SOURCES
+set(MAG_BLAS_SPEC_ARM64_SOURCES
     magnetron/magnetron_cpu_blas_arm64_v8_2.c
     magnetron/magnetron_cpu_blas_arm64_v9.c
 )
 
 if(${IS_AMD64})  # x86-64 specific compilation options
     include(CheckCCompilerFlag)
-    # Check for support of -mavx512bf16, some older GCC versions don't support it
-    check_c_compiler_flag("-mavx512bf16" COMPILER_SUPPORTS_MAVX512BF16)
-    if(NOT COMPILER_SUPPORTS_MAVX512BF16)
-        message(WARNING "Compiler does NOT support -mavx512bf16; removing it from the flags for magnetron_cpu_blas_amd64_v4_5.c")
-        set(AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
+    check_c_compiler_flag("-mavx512bf16" MAG_COMPILER_SUPPORTS_MAVX512BF16) # Check for support of -mavx512bf16, some older GCC versions don't support it
+    if(NOT MAG_COMPILER_SUPPORTS_MAVX512BF16)
+        message(WARNING "Compiler does NOT support -mavx512bf16. Please upgrade to a newer compiler. Some optimizations are disabled.")
+        set(MAG_AMD64_V45_AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
     else()
-        set(AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx512bf16 -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
+        set(MAG_AMD64_V45_AVX512_FLAGS "-mtune=generic -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx512vnni -mavx512bf16 -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe")
     endif()
-    set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_AMD64_SOURCES})
+    set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAG_BLAS_SPEC_AMD64_SOURCES})
     set_blas_spec_arch("magnetron_cpu_blas_amd64_v2.c"
         "-mtune=nehalem -mcx16 -mpopcnt -msse3 -mssse3 -msse4.1 -msse4.2"
         "/arch:SSE4.2")
@@ -46,10 +45,10 @@ if(${IS_AMD64})  # x86-64 specific compilation options
         "-mtune=cannonlake -mavx512f -mavx512bw -mavx512vl -mavx512dq -mavx -mavx2 -mbmi -mbmi2 -mf16c -mfma -mlzcnt -mmovbe"
         "/arch:AVX512")
     set_blas_spec_arch("magnetron_cpu_blas_amd64_v4_5.c"
-        "${AVX512_FLAGS}"
+        "${MAG_AMD64_V45_AVX512_FLAGS}"
         "/arch:AVX512")
 elseif(${IS_ARM64})
-    set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAGNETRON_BLAS_SPEC_ARM64_SOURCES})
+    set(MAGNETRON_SOURCES ${MAGNETRON_SOURCES} ${MAG_BLAS_SPEC_ARM64_SOURCES})
     set_blas_spec_arch("magnetron_cpu_blas_arm64_v8_2.c" "-march=armv8.2-a+dotprod+fp16" "")
     set_blas_spec_arch("magnetron_cpu_blas_arm64_v9.c" "-march=armv9-a+sve+sve2" "")
 endif()
diff --git a/python/benchmarks/bench_tool.py b/python/benchmarks/bench_tool.py
@@ -130,7 +130,7 @@ def plot(self, flops_per_op: int = 2, plot_style: str = 'bars') -> None:
         ax2.legend()
         ax2.grid(True)
 
-        plt.suptitle(f'{mag.Context.active().cpu_name}', y=1.05)
+        plt.suptitle(f'{mag.Context.primary().cpu_name}', y=1.05)
         plt.tight_layout()
         plt.show()
 

diff --git a/python/examples/xor.py b/python/examples/xor.py
@@ -23,15 +23,16 @@ def forward(self, x: Tensor) -> Tensor:
 
 y = Tensor.const([[0], [1], [1], [0]], name='y')
 
-epochs: int = 2
+epochs: int = 1
 
 y_hat = model(x)
 print(y_hat)
 for epoch in range(epochs):
     y_hat = model(x)
     loss = mse_loss(y_hat, y)
     loss.backward()
-    loss.export_graphviz(f'xor_{epoch}.dot')
+    if epoch == 0:
+        loss.export_graphviz(f'xor_{epoch}.dot')
     optim.step()
     optim.zero_grad()
     if epoch % 1000 == 0:

diff --git a/python/src/magnetron/core.py b/python/src/magnetron/core.py
@@ -5,6 +5,7 @@
 import weakref
 from dataclasses import dataclass
 from enum import Enum, auto, unique
+from functools import lru_cache
 from os import getenv
 from os.path import isfile
 
@@ -83,14 +84,14 @@ class GlobalConfig:
 
 @typing.final
 class Context:
-    _active: 'Context' = None
+    """Manages the execution context and owns all tensors and active compute devices."""
 
     @staticmethod
-    def active() -> 'Context':
-        if Context._active is None:
-            C.mag_set_log_mode(GlobalConfig.verbose)
-            Context._active = Context(GlobalConfig.compute_device)
-        return Context._active
+    @lru_cache(maxsize=1)
+    def primary() -> 'Context':
+        """Get global context singleton."""
+        C.mag_set_log_mode(GlobalConfig.verbose)
+        return Context(GlobalConfig.compute_device)
 
     def __init__(
         self,
@@ -212,11 +213,11 @@ class no_grad(contextlib.ContextDecorator):
 
     def __enter__(self) -> None:
         """Disable gradient tracking by stopping the active context's recorder."""
-        Context.active().stop_grad_recorder()
+        Context.primary().stop_grad_recorder()
 
     def __exit__(self, exc_type: any, exc_value: any, traceback: any) -> None:
         """Re-enable gradient tracking when exiting the context."""
-        Context.active().start_grad_recorder()
+        Context.primary().start_grad_recorder()
 
 
 class Tensor:
@@ -276,7 +277,7 @@ def empty(
     ) -> 'Tensor':
         tensor = cls(None)
         tensor._new(
-            Context.active(),
+            Context.primary(),
             shape=shape,
             dtype=dtype,
             requires_grad=requires_grad,
@@ -296,7 +297,7 @@ def full(
     ) -> 'Tensor':
         tensor = cls(None)
         tensor._new(
-            Context.active(),
+            Context.primary(),
             shape=shape,
             dtype=dtype,
             requires_grad=requires_grad,
@@ -334,7 +335,7 @@ def flatten_nested_lists(nested: object) -> tuple[tuple[int, ...], list[float]]:
         shape, flattened_data = flatten_nested_lists(data)
         tensor = cls(None)
         tensor._new(
-            Context.active(),
+            Context.primary(),
             shape=shape,
             dtype=dtype,
             requires_grad=requires_grad,
@@ -371,7 +372,7 @@ def uniform(
     ) -> 'Tensor':
         tensor = cls(None)
         tensor._new(
-            Context.active(),
+            Context.primary(),
             shape=shape,
             dtype=dtype,
             requires_grad=requires_grad,
@@ -394,7 +395,7 @@ def normal(
     ) -> 'Tensor':
         tensor = cls(None)
         tensor._new(
-            Context.active(),
+            Context.primary(),
             shape=shape,
             dtype=DType.F32,
             requires_grad=requires_grad,
@@ -406,7 +407,7 @@ def normal(
     @classmethod
     def load(cls, file_path: str) -> 'Tensor':
         assert file_path.endswith('.magnetron'), 'File must be a magnetron file'
-        instance = C.mag_tensor_load(Context.active()._ptr, bytes(file_path, 'utf-8'))
+        instance = C.mag_tensor_load(Context.primary()._ptr, bytes(file_path, 'utf-8'))
         return cls(ptr=instance)
 
     @classmethod
@@ -420,7 +421,7 @@ def load_image(
     ) -> 'Tensor':
         assert isfile(file_path), f'File not found: {file_path}'
         instance = C.mag_tensor_load_image(
-            Context.active()._ptr,
+            Context.primary()._ptr,
             bytes(file_path, 'utf-8'),
             channels.value,
             resize_to[0],

diff --git a/python/tests/context.py b/python/tests/context.py
@@ -2,10 +2,14 @@
 
 from magnetron import *
 
+GlobalConfig.verbose = True
+
 
 def test_context_creation() -> None:
     # Test that a context can be created and defaults are correct.
-    ctx = Context.active()
+    ctx = Context.primary()
+    ctx = Context.primary()
+    ctx = Context.primary()
     assert ctx.execution_mode.name in ('EAGER', 'DEFERRED')
     assert isinstance(ctx.os_name, str)
     assert isinstance(ctx.cpu_name, str)

diff --git a/python/tests/tensor_ops3.py b/python/tests/tensor_ops3.py
@@ -147,6 +147,7 @@ def test_matmul_scalar_by_matrix() -> None:
         assert mag_result.shape == np_result.shape
         np.testing.assert_allclose(tonumpy(mag_result), np_result, atol=EPS)
 
+
 """
 def test_matmul_x_transposed() -> None:
     shape_a = (4, 2)

diff --git a/python/tests/xor_nn.py b/python/tests/xor_nn.py
@@ -4,7 +4,7 @@
 from magnetron import Tensor, Context
 
 np.random.seed(932002)
-Context.active().seed(932002)
+Context.primary().seed(932002)
 
 LR: float = 0.1
 EPOCHS: int = 10000
@@ -107,6 +107,7 @@ def predict(x: Tensor) -> Tensor:
 
     return [predict(Tensor.const([xr]))[0] for xr in INPUT]
 
+
 """
 def test_xor_nn() -> None:
     np_out = xor_nn_np()