Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
1148a72
HLSL 6.0 ... 6.9
soufianekhiat Mar 25, 2026
a333169
Missing header
soufianekhiat Mar 26, 2026
8270fda
Fix JIT tests and add support for GPU Textures
soufianekhiat Mar 26, 2026
9c4ddaa
Add tests to validate HLSL 6.x features
soufianekhiat Mar 26, 2026
b880b5c
Fix Clang Tidy.
soufianekhiat Mar 26, 2026
997ccbd
Apply pre-commit auto-fixes
halide-ci[bot] Mar 26, 2026
3ad196f
Add strict_float
soufianekhiat Mar 27, 2026
b650a16
Retrigger GHA
alexreinking Mar 26, 2026
1925554
Apply pre-commit auto-fixes
halide-ci[bot] Mar 26, 2026
1c99661
Appease clang-tidy
alexreinking Mar 27, 2026
2eba11b
Implement strict float intrinsics for D3D12
alexreinking Mar 27, 2026
58223e6
Address PR review: fix DXC codegen bugs
soufianekhiat Mar 27, 2026
2486419
Remove duplicate visit(const Shuffle *op)
soufianekhiat Mar 28, 2026
37f4d92
Fix uint16 cast errors
soufianekhiat Mar 30, 2026
0096e32
Fix CL part 2
soufianekhiat Mar 30, 2026
7f1eb3f
Fix CI Part 3
soufianekhiat Mar 31, 2026
768b61a
CI++
soufianekhiat Mar 31, 2026
7ed2dc6
CI
soufianekhiat Mar 31, 2026
bc85661
shorter name for cross compilation and fix GUID
soufianekhiat Apr 1, 2026
8f17c7d
Fix comment and mismatch naming
soufianekhiat Apr 1, 2026
1820ba3
Too agressive for replace_all
soufianekhiat Apr 1, 2026
90168c0
Rename + fix warning of implicit cast
soufianekhiat Apr 1, 2026
331f7ce
rename
soufianekhiat Apr 1, 2026
16efab1
Fix for correctness_mul_div_mod
soufianekhiat Apr 1, 2026
463c8c6
skip 64 bits buffer for: correctness_{gpu_mixed_shared_mem_types, mat…
soufianekhiat Apr 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions python_bindings/src/halide/halide_/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ void define_enums(py::module &m) {
.value("AVX10_1", Target::Feature::AVX10_1)
.value("X86APX", Target::Feature::X86APX)
.value("Simulator", Target::Feature::Simulator)
.value("HLSL_SM60", Target::Feature::HLSL_SM60)
.value("HLSL_SM61", Target::Feature::HLSL_SM61)
.value("HLSL_SM62", Target::Feature::HLSL_SM62)
.value("HLSL_SM63", Target::Feature::HLSL_SM63)
.value("HLSL_SM64", Target::Feature::HLSL_SM64)
.value("HLSL_SM65", Target::Feature::HLSL_SM65)
.value("HLSL_SM66", Target::Feature::HLSL_SM66)
.value("HLSL_SM67", Target::Feature::HLSL_SM67)
.value("HLSL_SM68", Target::Feature::HLSL_SM68)
.value("HLSL_SM69", Target::Feature::HLSL_SM69)
.value("FeatureEnd", Target::Feature::FeatureEnd);

py::enum_<halide_type_code_t>(m, "TypeCode")
Expand Down
660 changes: 580 additions & 80 deletions src/CodeGen_D3D12Compute_Dev.cpp

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion src/DeviceInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,11 @@ Expr make_device_interface_call(DeviceAPI device_api, MemoryType memory_type) {
interface_name = "halide_hexagon_dma_device_interface";
break;
case DeviceAPI::D3D12Compute:
interface_name = "halide_d3d12compute_device_interface";
if (memory_type == MemoryType::GPUTexture) {
interface_name = "halide_d3d12compute_image_device_interface";
} else {
interface_name = "halide_d3d12compute_device_interface";
}
break;
case DeviceAPI::Vulkan:
interface_name = "halide_vulkan_device_interface";
Expand Down
135 changes: 130 additions & 5 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,16 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"trace_realizations", Target::TraceRealizations},
{"trace_pipeline", Target::TracePipeline},
{"d3d12compute", Target::D3D12Compute},
{"hlsl_sm60", Target::HLSL_SM60},
{"hlsl_sm61", Target::HLSL_SM61},
{"hlsl_sm62", Target::HLSL_SM62},
{"hlsl_sm63", Target::HLSL_SM63},
{"hlsl_sm64", Target::HLSL_SM64},
{"hlsl_sm65", Target::HLSL_SM65},
{"hlsl_sm66", Target::HLSL_SM66},
{"hlsl_sm67", Target::HLSL_SM67},
{"hlsl_sm68", Target::HLSL_SM68},
{"hlsl_sm69", Target::HLSL_SM69},
{"strict_float", Target::StrictFloat},
{"tsan", Target::TSAN},
{"asan", Target::ASAN},
Expand Down Expand Up @@ -1135,6 +1145,22 @@ void Target::validate_features() const {
VSX,
});
}

// D3D12Compute SM version features require D3D12Compute to also be set.
if (!has_feature(D3D12Compute)) {
do_check_bad(*this, {
HLSL_SM60,
HLSL_SM61,
HLSL_SM62,
HLSL_SM63,
HLSL_SM64,
HLSL_SM65,
HLSL_SM66,
HLSL_SM67,
HLSL_SM68,
HLSL_SM69,
});
}
}

Target::Target(const std::string &target) {
Expand Down Expand Up @@ -1378,6 +1404,43 @@ int Target::get_vulkan_capability_lower_bound() const {
return 10;
}

int Target::get_d3d12compute_capability_lower_bound() const {
if (!has_feature(Target::D3D12Compute)) {
return -1;
}
if (has_feature(Target::HLSL_SM60)) {
return 60;
}
if (has_feature(Target::HLSL_SM61)) {
return 61;
}
if (has_feature(Target::HLSL_SM62)) {
return 62;
}
if (has_feature(Target::HLSL_SM63)) {
return 63;
}
if (has_feature(Target::HLSL_SM64)) {
return 64;
}
if (has_feature(Target::HLSL_SM65)) {
return 65;
}
if (has_feature(Target::HLSL_SM66)) {
return 66;
}
if (has_feature(Target::HLSL_SM67)) {
return 67;
}
if (has_feature(Target::HLSL_SM68)) {
return 68;
}
if (has_feature(Target::HLSL_SM69)) {
return 69;
}
return 51; // default: SM 5.1 (FXC)
}

int Target::get_arm_v8_lower_bound() const {
if (has_feature(Target::ARMv8a)) {
return 80;
Expand Down Expand Up @@ -1416,13 +1479,13 @@ bool Target::supports_type(const Type &t) const {
if (t.bits() == 64) {
if (t.is_float()) {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(D3D12Compute) || get_d3d12compute_capability_lower_bound() >= 60) &&
(!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) &&
!has_feature(WebGPU));
} else {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(D3D12Compute) || get_d3d12compute_capability_lower_bound() >= 60) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) &&
!has_feature(WebGPU));
}
Expand Down Expand Up @@ -1450,9 +1513,18 @@ bool Target::supports_type(const Type &t, DeviceAPI device) const {
return has_feature(Target::CLDoubles);
}
} else if (device == DeviceAPI::D3D12Compute) {
// Shader Model 5.x can optionally support double-precision; 64-bit int
// types are not supported.
return t.bits() < 64;
// SM 5.1 (FXC): no 64-bit types. float16 and int8 work via widening.
// SM 6.0+: 64-bit int and float (double, int64_t, uint64_t) supported.
// SM 6.2+: native 16-bit float (float16_t) and int (int16_t, uint16_t).
// SM 6.6+: native 8-bit int (int8_t, uint8_t). Earlier SMs widen to int32.
// SM 6.9+: long vectors (5–1024 lanes) via vector<T, N> syntax.
if (t.bits() == 64) {
return get_d3d12compute_capability_lower_bound() >= 60;
}
if (t.lanes() > 4) {
return get_d3d12compute_capability_lower_bound() >= 69;
}
return true;
} else if (device == DeviceAPI::Vulkan) {
if (t.is_float() && t.bits() == 64) {
return has_feature(Target::VulkanFloat64);
Expand Down Expand Up @@ -1653,6 +1725,17 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
VulkanV12,
VulkanV13,

HLSL_SM60,
HLSL_SM61,
HLSL_SM62,
HLSL_SM63,
HLSL_SM64,
HLSL_SM65,
HLSL_SM66,
HLSL_SM67,
HLSL_SM68,
HLSL_SM69,

ARMv8a,
ARMv81a,
ARMv82a,
Expand Down Expand Up @@ -1787,6 +1870,43 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
output.features.reset(VulkanV13);
}

// Pick tight lower bound for D3D12Compute SM version. Use fall-through to clear redundant features
int d3d12_sm_a = get_d3d12compute_capability_lower_bound();
int d3d12_sm_b = other.get_d3d12compute_capability_lower_bound();

// Same trick as CUDA: -1 (unused) becomes large when cast to unsigned, so min gives the true lower bound.
int d3d12_sm = std::min((unsigned)d3d12_sm_a, (unsigned)d3d12_sm_b);
if (d3d12_sm < 60) {
output.features.reset(HLSL_SM60);
}
if (d3d12_sm < 61) {
output.features.reset(HLSL_SM61);
}
if (d3d12_sm < 62) {
output.features.reset(HLSL_SM62);
}
if (d3d12_sm < 63) {
output.features.reset(HLSL_SM63);
}
if (d3d12_sm < 64) {
output.features.reset(HLSL_SM64);
}
if (d3d12_sm < 65) {
output.features.reset(HLSL_SM65);
}
if (d3d12_sm < 66) {
output.features.reset(HLSL_SM66);
}
if (d3d12_sm < 67) {
output.features.reset(HLSL_SM67);
}
if (d3d12_sm < 68) {
output.features.reset(HLSL_SM68);
}
if (d3d12_sm < 69) {
output.features.reset(HLSL_SM69);
}

// Pick tight lower bound for HVX version. Use fall-through to clear redundant features
int hvx_a = get_hvx_lower_bound(*this);
int hvx_b = get_hvx_lower_bound(other);
Expand Down Expand Up @@ -1874,6 +1994,11 @@ void target_test() {
{{"hexagon-32-qurt-hvx_v62", "hexagon-32-qurt", "hexagon-32-qurt"}},
{{"hexagon-32-qurt-hvx_v62-hvx", "hexagon-32-qurt", ""}},
{{"hexagon-32-qurt-hvx_v62-hvx", "hexagon-32-qurt-hvx", "hexagon-32-qurt-hvx"}},
{{"x86-64-windows-d3d12compute-hlsl_sm66", "x86-64-windows-d3d12compute", "x86-64-windows-d3d12compute"}},
{{"x86-64-windows-d3d12compute-hlsl_sm66", "x86-64-windows-d3d12compute-hlsl_sm60", "x86-64-windows-d3d12compute-hlsl_sm60"}},
{{"x86-64-windows-d3d12compute-hlsl_sm62", "x86-64-windows-d3d12compute-hlsl_sm62", "x86-64-windows-d3d12compute-hlsl_sm62"}},
{{"x86-64-windows-d3d12compute-hlsl_sm69", "x86-64-windows-d3d12compute", "x86-64-windows-d3d12compute"}},
{{"x86-64-windows-d3d12compute-hlsl_sm69", "x86-64-windows-d3d12compute-hlsl_sm60", "x86-64-windows-d3d12compute-hlsl_sm60"}},
};

for (const auto &test : gcd_tests) {
Expand Down
15 changes: 15 additions & 0 deletions src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,16 @@ struct Target {
AVX10_1 = halide_target_feature_avx10_1,
X86APX = halide_target_feature_x86_apx,
Simulator = halide_target_feature_simulator,
HLSL_SM60 = halide_target_feature_hlsl_sm60,
HLSL_SM61 = halide_target_feature_hlsl_sm61,
HLSL_SM62 = halide_target_feature_hlsl_sm62,
HLSL_SM63 = halide_target_feature_hlsl_sm63,
HLSL_SM64 = halide_target_feature_hlsl_sm64,
HLSL_SM65 = halide_target_feature_hlsl_sm65,
HLSL_SM66 = halide_target_feature_hlsl_sm66,
HLSL_SM67 = halide_target_feature_hlsl_sm67,
HLSL_SM68 = halide_target_feature_hlsl_sm68,
HLSL_SM69 = halide_target_feature_hlsl_sm69,
FeatureEnd = halide_target_feature_end
};
Target() = default;
Expand Down Expand Up @@ -349,6 +359,11 @@ struct Target {
* features are set. */
int get_vulkan_capability_lower_bound() const;

/** Get the minimum D3D12Compute Shader Model version as an integer
* (e.g. 60 for SM 6.0, 62 for SM 6.2). Returns 51 (SM 5.1, FXC path)
* if no SM 6.x features are set, or -1 if D3D12Compute is not enabled. */
int get_d3d12compute_capability_lower_bound() const;

/** Get the minimum ARM v8.x capability found as an integer. Returns
* -1 if no ARM v8.x features are set. */
int get_arm_v8_lower_bound() const;
Expand Down
10 changes: 10 additions & 0 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1478,6 +1478,16 @@ typedef enum halide_target_feature_t {
halide_target_feature_avx10_1, ///< Intel AVX10 version 1 support. vector_bits is used to indicate width.
halide_target_feature_x86_apx, ///< Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd .
halide_target_feature_simulator, ///< Target is for a simulator environment. Currently only applies to iOS.
halide_target_feature_hlsl_sm60, ///< Enable D3D12 Shader Model 6.0 (DXIL, 64-bit types, wave intrinsics). Requires d3d12compute. Uses DXC compiler.
halide_target_feature_hlsl_sm61, ///< Enable D3D12 Shader Model 6.1
halide_target_feature_hlsl_sm62, ///< Enable D3D12 Shader Model 6.2 (native 16-bit scalar types with -enable-16bit-types)
halide_target_feature_hlsl_sm63, ///< Enable D3D12 Shader Model 6.3
halide_target_feature_hlsl_sm64, ///< Enable D3D12 Shader Model 6.4
halide_target_feature_hlsl_sm65, ///< Enable D3D12 Shader Model 6.5
halide_target_feature_hlsl_sm66, ///< Enable D3D12 Shader Model 6.6 (64-bit atomics, packed 8-bit types)
halide_target_feature_hlsl_sm67, ///< Enable D3D12 Shader Model 6.7
halide_target_feature_hlsl_sm68, ///< Enable D3D12 Shader Model 6.8
halide_target_feature_hlsl_sm69, ///< Enable D3D12 Shader Model 6.9 (long vectors 5-1024 lanes, native 16-bit/wave/int64 required)
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
} halide_target_feature_t;

Expand Down
3 changes: 3 additions & 0 deletions src/runtime/HalideRuntimeD3D12Compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
extern "C" {
#endif

#define HALIDE_RUNTIME_D3D12COMPUTE

/** \file
* Routines specific to the Halide Direct3D 12 Compute runtime.
*/

extern const struct halide_device_interface_t *halide_d3d12compute_device_interface();
extern const struct halide_device_interface_t *halide_d3d12compute_image_device_interface();

/** These are forward declared here to allow clients to override the
* Halide Direct3D 12 Compute runtime. Do not call them. */
Expand Down
Loading