intel
diff --git a/‎clang/include/clang/Driver/Action.h
Lines changed: 6 additions & 1 deletion b/‎clang/include/clang/Driver/Action.h
Lines changed: 6 additions & 1 deletion
diff --git a/‎clang/include/clang/Driver/Options.td
Lines changed: 2 additions & 0 deletions b/‎clang/include/clang/Driver/Options.td
Lines changed: 2 additions & 0 deletions
diff --git a/‎clang/lib/Driver/Action.cpp
Lines changed: 4 additions & 4 deletions b/‎clang/lib/Driver/Action.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎clang/lib/Driver/Driver.cpp
Lines changed: 64 additions & 52 deletions b/‎clang/lib/Driver/Driver.cpp
Lines changed: 64 additions & 52 deletions
diff --git a/‎clang/lib/Driver/ToolChains/Clang.cpp
Lines changed: 9 additions & 1 deletion b/‎clang/lib/Driver/ToolChains/Clang.cpp
Lines changed: 9 additions & 1 deletion
diff --git a/‎sycl-fusion/common/include/Kernel.h
Lines changed: 1 addition & 1 deletion b/‎sycl-fusion/common/include/Kernel.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎sycl-fusion/common/lib/KernelIO.h
Lines changed: 1 addition & 0 deletions b/‎sycl-fusion/common/lib/KernelIO.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎sycl-fusion/jit-compiler/CMakeLists.txt
Lines changed: 11 additions & 1 deletion b/‎sycl-fusion/jit-compiler/CMakeLists.txt
Lines changed: 11 additions & 1 deletion
diff --git a/‎sycl-fusion/jit-compiler/include/JITContext.h
Lines changed: 12 additions & 5 deletions b/‎sycl-fusion/jit-compiler/include/JITContext.h
Lines changed: 12 additions & 5 deletions
diff --git a/‎sycl-fusion/jit-compiler/include/Options.h
Lines changed: 6 additions & 1 deletion b/‎sycl-fusion/jit-compiler/include/Options.h
Lines changed: 6 additions & 1 deletion
@@ -660,9 +660,14 @@ class OffloadUnbundlingJobAction final : public JobAction {
 class OffloadWrapperJobAction : public JobAction {
   void anchor() override;
 
+  bool EmbedIR;
+
 public:
   OffloadWrapperJobAction(ActionList &Inputs, types::ID Type);
-  OffloadWrapperJobAction(Action *Input, types::ID OutputType);
+  OffloadWrapperJobAction(Action *Input, types::ID OutputType,
+                          bool EmbedIR = false);
+
+  bool isEmbeddedIR() const { return EmbedIR; }
 
   static bool classof(const Action *A) {
     return A->getKind() == OffloadWrapperJobClass;
 
@@ -2973,6 +2973,8 @@ def fintelfpga : Flag<["-"], "fintelfpga">, Group<f_Group>,
   HelpText<"Perform ahead-of-time compilation for FPGA">;
 def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
   HelpText<"Compile SYCL kernels for device">;
+def fsycl_embed_ir : Flag<["-"], "fsycl-embed-ir">, Flags<[CoreOption]>,
+  HelpText<"Embed LLVM IR for runtime kernel fusion">;
 defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-mem",
     LangOpts<"SYCLESIMDForceStatelessMem">, DefaultFalse,
     PosFlag<SetTrue, [], "Enforce using stateless memory accesses. "
 
@@ -478,11 +478,11 @@ void OffloadWrapperJobAction::anchor() {}
 
 OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs,
                                                  types::ID Type)
-  : JobAction(OffloadWrapperJobClass, Inputs, Type) {}
+    : JobAction(OffloadWrapperJobClass, Inputs, Type), EmbedIR(false) {}
 
-OffloadWrapperJobAction::OffloadWrapperJobAction(Action *Input,
-                                                 types::ID Type)
-    : JobAction(OffloadWrapperJobClass, Input, Type) {}
+OffloadWrapperJobAction::OffloadWrapperJobAction(Action *Input, types::ID Type,
+                                                 bool IsEmbeddedIR)
+    : JobAction(OffloadWrapperJobClass, Input, Type), EmbedIR(IsEmbeddedIR) {}
 
 void OffloadPackagerJobAction::anchor() {}
 
 
@@ -5516,6 +5516,8 @@ class OffloadingActionBuilder final {
         //   s - device code split requested
         //   r - relocatable device code is requested
         //   f - link object output type is TY_Tempfilelist (fat archive)
+        //   e - Embedded IR for fusion (-fsycl-embed-ir) was requested
+        //       and target is NVPTX.
         //   * - "all other cases"
         //     - no condition means output/input is "always" present
         // First symbol indicates output/input type
@@ -5535,58 +5537,58 @@ class OffloadingActionBuilder final {
         //                |             |
         //                |             |
         //         .---------------------------------------.
-        //         |               PostLink                |
-        //         .---------------------------------------.
-        //                           [+*]                [+]
-        //                             |                  |
-        //                             |                  |
-        //                             |---------         |
-        //                             |        |         |
-        //                             |        |         |
-        //                             |      [+!rf]      |
-        //                             |  .-------------. |
-        //                             |  | llvm-foreach| |
-        //                             |  .-------------. |
-        //                             |        |         |
-        //                            [+*]    [+!rf]      |
-        //                      .-----------------.       |
-        //                      | FileTableTform  |       |
-        //                      | (extract "Code")|       |
-        //                      .-----------------.       |
-        //                              [-]               |-----------
-        //           --------------------|                           |
-        //           |                   |                           |
-        //           |                   |-----------------          |
-        //           |                   |                |          |
-        //           |                   |               [-!rf]      |
-        //           |                   |         .--------------.  |
-        //           |                   |         |FileTableTform|  |
-        //           |                   |         |   (merge)    |  |
-        //           |                   |         .--------------.  |
-        //           |                   |               [-]         |-------
-        //           |                   |                |          |      |
-        //           |                   |                |    ------|      |
-        //           |                   |        --------|    |            |
-        //          [.]                 [-*]   [-!rf]        [+!rf]         |
-        //   .---------------.  .-------------------. .--------------.      |
-        //   | finalizeNVPTX  | |  SPIRVTranslator  | |FileTableTform|      |
-        //   | finalizeAMDGCN | |                   | |   (merge)    |      |
-        //   .---------------.  .-------------------. . -------------.      |
-        //          [.]             [-as]      [-!a]         |              |
-        //           |                |          |           |              |
-        //           |              [-s]         |           |              |
-        //           |       .----------------.  |           |              |
-        //           |       | BackendCompile |  |           |              |
-        //           |       .----------------.  |     ------|              |
-        //           |              [-s]         |     |                    |
-        //           |                |          |     |                    |
-        //           |              [-a]      [-!a]  [-!rf]                 |
-        //           |              .--------------------.                  |
-        //           -----------[-n]|   FileTableTform   |[+*]--------------|
-        //                          |  (replace "Code")  |
-        //                          .--------------------.
-        //                                      |
-        //                                    [+*]
+        //         |               PostLink                |[+e]----------------
+        //         .---------------------------------------.                   |
+        //                           [+*]                [+]                   |
+        //                             |                  |                    |
+        //                             |                  |                    |
+        //                             |---------         |                    |
+        //                             |        |         |                    |
+        //                             |        |         |                    |
+        //                             |      [+!rf]      |                    |
+        //                             |  .-------------. |                    |
+        //                             |  | llvm-foreach| |                    |
+        //                             |  .-------------. |                    |
+        //                             |        |         |                    |
+        //                            [+*]    [+!rf]      |                    |
+        //                      .-----------------.       |                    |
+        //                      | FileTableTform  |       |                    |
+        //                      | (extract "Code")|       |                    |
+        //                      .-----------------.       |                    |
+        //                              [-]               |-----------         |
+        //           --------------------|                           |         |
+        //           |                   |                           |         |
+        //           |                   |-----------------          |         |
+        //           |                   |                |          |         |
+        //           |                   |               [-!rf]      |         |
+        //           |                   |         .--------------.  |         |
+        //           |                   |         |FileTableTform|  |         |
+        //           |                   |         |   (merge)    |  |         |
+        //           |                   |         .--------------.  |         |
+        //           |                   |               [-]         |-------  |
+        //           |                   |                |          |      |  |
+        //           |                   |                |    ------|      |  |
+        //           |                   |        --------|    |            |  |
+        //          [.]                 [-*]   [-!rf]        [+!rf]         |  |
+        //   .---------------.  .-------------------. .--------------.      |  |
+        //   | finalizeNVPTX  | |  SPIRVTranslator  | |FileTableTform|      |  |
+        //   | finalizeAMDGCN | |                   | |   (merge)    |      |  |
+        //   .---------------.  .-------------------. . -------------.      |  |
+        //          [.]             [-as]      [-!a]         |              |  |
+        //           |                |          |           |              |  |
+        //           |              [-s]         |           |              |  |
+        //           |       .----------------.  |           |              |  |
+        //           |       | BackendCompile |  |           |              |  |
+        //           |       .----------------.  |     ------|              |  |
+        //           |              [-s]         |     |                    |  |
+        //           |                |          |     |                    |  |
+        //           |              [-a]      [-!a]  [-!rf]                 |  |
+        //           |              .--------------------.                  |  |
+        //           -----------[-n]|   FileTableTform   |[+*]--------------|  |
+        //                          |  (replace "Code")  |                     |
+        //                          .--------------------.                     |
+        //                                      |      -------------------------
+        //                                    [+*]     | [+e]
         //         .--------------------------------------.
         //         |            OffloadWrapper            |
         //         .--------------------------------------.
@@ -5693,6 +5695,16 @@ class OffloadingActionBuilder final {
             return TypedPostLinkAction;
           };
           Action *PostLinkAction = createPostLinkAction();
+          if (isNVPTX && Args.hasArg(options::OPT_fsycl_embed_ir)) {
+            // When compiling for Nvidia/CUDA devices and the user requested the
+            // IR to be embedded in the application (via option), run the output
+            // of sycl-post-link (filetable referencing LLVM Bitcode + symbols)
+            // through the offload wrapper and link the resulting object to the
+            // application.
+            auto *WrapBitcodeAction = C.MakeAction<OffloadWrapperJobAction>(
+                PostLinkAction, types::TY_Object, true);
+            DA.add(*WrapBitcodeAction, *TC, BoundArch, Action::OFK_SYCL);
+          }
           bool NoRDCFatStaticArchive =
               !IsRDC &&
               FullDeviceLinkAction->getType() == types::TY_Tempfilelist;
 
@@ -9272,6 +9272,14 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
       createArgString("-link-opts=");
     }
 
+    bool IsEmbeddedIR = cast<OffloadWrapperJobAction>(JA).isEmbeddedIR();
+    if (IsEmbeddedIR) {
+      // When the offload-wrapper is called to embed LLVM IR, add a prefix to
+      // the target triple to distinguish the LLVM IR from the actual device
+      // binary for that target.
+      TargetTripleOpt = ("llvm_" + TargetTripleOpt).str();
+    }
+
     WrapperArgs.push_back(
         C.getArgs().MakeArgString(Twine("-target=") + TargetTripleOpt));
 
@@ -9293,7 +9301,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
     assert(I.isFilename() && "Invalid input.");
 
     if (I.getType() == types::TY_Tempfiletable ||
-        I.getType() == types::TY_Tempfilelist)
+        I.getType() == types::TY_Tempfilelist || IsEmbeddedIR)
       // wrapper actual input files are passed via the batch job file table:
       WrapperArgs.push_back(C.getArgs().MakeArgString("-batch"));
     WrapperArgs.push_back(C.getArgs().MakeArgString(I.getFilename()));
 
@@ -34,7 +34,7 @@ enum class ParameterKind : uint32_t {
 };
 
 /// Different binary formats supported as input to the JIT compiler.
-enum class BinaryFormat : uint32_t { INVALID, LLVM, SPIRV };
+enum class BinaryFormat : uint32_t { INVALID, LLVM, SPIRV, PTX };
 
 /// Information about a device intermediate representation module (e.g., SPIR-V,
 /// LLVM IR) from DPC++.
 
@@ -47,6 +47,7 @@ template <> struct ScalarEnumerationTraits<jit_compiler::BinaryFormat> {
   static void enumeration(IO &IO, jit_compiler::BinaryFormat &BF) {
     IO.enumCase(BF, "LLVM", jit_compiler::BinaryFormat::LLVM);
     IO.enumCase(BF, "SPIRV", jit_compiler::BinaryFormat::SPIRV);
+    IO.enumCase(BF, "PTX", jit_compiler::BinaryFormat::PTX);
     IO.enumCase(BF, "INVALID", jit_compiler::BinaryFormat::INVALID);
   }
 };
 
@@ -2,13 +2,15 @@
 add_llvm_library(sycl-fusion
    lib/KernelFusion.cpp
    lib/JITContext.cpp
+   lib/translation/KernelTranslation.cpp
    lib/translation/SPIRVLLVMTranslation.cpp
    lib/fusion/FusionPipeline.cpp
    lib/fusion/FusionHelper.cpp
    lib/fusion/ModuleHelper.cpp
    lib/helper/ConfigHelper.cpp
 
-  LINK_COMPONENTS
+   LINK_COMPONENTS
+   BitReader
    Core
    Support
    Analysis
@@ -18,6 +20,10 @@ add_llvm_library(sycl-fusion
    Linker
    ScalarOpts
    InstCombine
+   Target
+   TargetParser
+   MC
+   ${LLVM_TARGETS_TO_BUILD}
 )
 
 target_include_directories(sycl-fusion
@@ -40,6 +46,10 @@ target_link_libraries(sycl-fusion
   ${CMAKE_THREAD_LIBS_INIT}
 )
 
+if("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
+  target_compile_definitions(sycl-fusion PRIVATE FUSION_JIT_SUPPORT_PTX)
+endif()
+
 if (BUILD_SHARED_LIBS)
   if(NOT MSVC AND NOT APPLE)
     # Manage symbol visibility through the linker to make sure no LLVM symbols
 
@@ -36,17 +36,21 @@ using CacheKeyT =
                std::optional<std::vector<NDRange>>>;
 
 ///
-/// Wrapper around a SPIR-V binary.
-class SPIRVBinary {
+/// Wrapper around a kernel binary.
+class KernelBinary {
 public:
-  explicit SPIRVBinary(std::string Binary);
+  explicit KernelBinary(std::string &&Binary, BinaryFormat Format);
 
   jit_compiler::BinaryAddress address() const;
 
   size_t size() const;
 
+  BinaryFormat format() const;
+
 private:
   std::string Blob;
+
+  BinaryFormat Format;
 };
 
 ///
@@ -61,7 +65,10 @@ class JITContext {
 
   llvm::LLVMContext *getLLVMContext();
 
-  SPIRVBinary &emplaceSPIRVBinary(std::string Binary);
+  template <typename... Ts> KernelBinary &emplaceKernelBinary(Ts &&...Args) {
+    WriteLockT WriteLock{BinariesMutex};
+    return Binaries.emplace_back(std::forward<Ts>(Args)...);
+  }
 
   std::optional<SYCLKernelInfo> getCacheEntry(CacheKeyT &Identifier) const;
 
@@ -79,7 +86,7 @@ class JITContext {
 
   MutexT BinariesMutex;
 
-  std::vector<SPIRVBinary> Binaries;
+  std::vector<KernelBinary> Binaries;
 
   mutable MutexT CacheMutex;
 
 
@@ -9,12 +9,14 @@
 #ifndef SYCL_FUSION_JIT_COMPILER_OPTIONS_H
 #define SYCL_FUSION_JIT_COMPILER_OPTIONS_H
 
+#include "Kernel.h"
+
 #include <memory>
 #include <unordered_map>
 
 namespace jit_compiler {
 
-enum OptionID { VerboseOutput, EnableCaching };
+enum OptionID { VerboseOutput, EnableCaching, TargetFormat };
 
 class OptionPtrBase {};
 
@@ -78,6 +80,9 @@ struct JITEnableVerbose : public OptionBase<OptionID::VerboseOutput, bool> {};
 
 struct JITEnableCaching : public OptionBase<OptionID::EnableCaching, bool> {};
 
+struct JITTargetFormat
+    : public OptionBase<OptionID::TargetFormat, BinaryFormat> {};
+
 } // namespace option
 } // namespace jit_compiler
Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,7 @@ template <> struct ScalarEnumerationTraits<jit_compiler::BinaryFormat> {`
`47`	`47`	`static void enumeration(IO &IO, jit_compiler::BinaryFormat &BF) {`
`48`	`48`	`IO.enumCase(BF, "LLVM", jit_compiler::BinaryFormat::LLVM);`
`49`	`49`	`IO.enumCase(BF, "SPIRV", jit_compiler::BinaryFormat::SPIRV);`
	`50`	`+ IO.enumCase(BF, "PTX", jit_compiler::BinaryFormat::PTX);`
`50`	`51`	`IO.enumCase(BF, "INVALID", jit_compiler::BinaryFormat::INVALID);`
`51`	`52`	`}`
`52`	`53`	`};`